def test_factory(self): from pyglottolog.languoids import Languoid, Level f = Languoid.from_dir(self.tree.joinpath('abcd1234')) self.assertEqual(f.category, 'Family') l = Languoid.from_dir(self.tree.joinpath(f.id, 'abcd1235')) self.assertEqual(l.name, 'language') self.assertEqual(l.level, Level.language) self.assertAlmostEqual(l.latitude, 0.5) self.assertAlmostEqual(l.longitude, 0.5) l.latitude, l.longitude = 1.0, 1.0 self.assertAlmostEqual(l.latitude, 1.0) self.assertAlmostEqual(l.longitude, 1.0) self.assertEqual(l.iso_code, 'abc') l.iso_code = 'cde' self.assertEqual(l.iso, 'cde') self.assertEqual(l.hid, 'abc') l.hid = 'abo' self.assertEqual(l.hid, 'abo') self.assertEqual(l.id, 'abcd1235') self.assertEqual(l.macroareas, ['a', 'b']) l.macroareas = ['a'] self.assertEqual(l.macroareas, ['a']) self.assertEqual(l.parent, f) self.assertEqual(f.children[0], l) self.assertEqual(l.children[0].family, f) l.write_info(self.tmp_path('new').as_posix())
def test_lang2tree(self): old, new = self.tmp_path('old'), self.tmp_path('new') old.mkdir() new.mkdir() lang2tree( Languoid.from_name_id_level('name', 'abcd1234', Level.language), [('parent', 'abcd1233', Level.family)], old, {}) assert old.joinpath('abcd1233', 'abcd1234', 'abcd1234.ini').exists() lang2tree( Languoid.from_name_id_level('name', 'abcd1234', Level.language), [('parent', 'abcd1233', Level.family)], new, {l.id: l for l in walk_tree(old)}) assert new.joinpath('abcd1233', 'abcd1234', 'abcd1234.ini').exists()
def read_lff(level, fp=None, dry_run=False): assert isinstance(level, Level) lang_line = re.compile("\s+" + NAME_AND_ID_REGEX + "(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$") class_line = re.compile(NAME_AND_ID_REGEX + "(,\s*" + NAME_AND_ID_REGEX + ")*$") isolate_line = re.compile("([^\[]+)(\[-isolate-\])$") path = None with fp or build_path("%sff.txt" % level.name[0]).open(encoding="utf8") as fp: for line in fp: line = line.rstrip() if line.startswith("#") or not line.strip(): # ignore comments or empty lines continue match = lang_line.match(line) if match: assert path yield Languoid.from_lff(None if path == "isolate" else path, line.strip(), level, dry_run=dry_run) else: match = isolate_line.match(line) if match: path = "isolate" else: # assert it matches a classification line! if not class_line.match(line): raise ValueError(line) path = line.strip()
def read_lff(level, fp=None): lang_line = re.compile('\s+' + NAME_AND_ID_REGEX + '(\[([a-z]{3})?\])$') class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX + ')*$') isolate_line = re.compile('([^\[]+)(\[-isolate-\])$') path = None with fp or build_path('%sff.txt' % level[0]).open(encoding='utf8') as fp: for line in fp: line = line.rstrip() if line.startswith('#') or not line.strip(): # ignore comments or empty lines continue match = lang_line.match(line) if match: assert path yield Languoid.from_lff( None if path == 'isolate' else path, line.strip(), level) else: match = isolate_line.match(line) if match: path = 'isolate' else: # assert it matches a classification line! if not class_line.match(line): raise ValueError(line) path = line.strip()
def read_lff(level, fp=None, dry_run=False): assert isinstance(level, Level) lang_line = re.compile('\s+' + NAME_AND_ID_REGEX + '(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$') class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX + ')*$') isolate_line = re.compile('([^\[]+)(\[-isolate-\])$') path = None with fp or build_path( '%sff.txt' % level.name[0]).open(encoding='utf8') as fp: for line in fp: line = line.rstrip() if line.startswith('#') or not line.strip(): # ignore comments or empty lines continue match = lang_line.match(line) if match: assert path yield Languoid.from_lff(None if path == 'isolate' else path, line.strip(), level, dry_run=dry_run) else: match = isolate_line.match(line) if match: path = 'isolate' else: # assert it matches a classification line! if not class_line.match(line): raise ValueError(line) path = line.strip()
def lang2tree(lang, lineage, out, old_tree): groupdir = out for name, id_, level in lineage: groupdir = groupdir.joinpath(id_) if not groupdir.exists(): groupdir.mkdir() if id_ in old_tree: group = old_tree[id_] assert group.level == level if name != group.name: # rename a subgroup! group.name = name else: group = Languoid.from_name_id_level(name, id_, level) group.write_info(groupdir) langdir = groupdir.joinpath(lang.fname()) langdir.mkdir() if lang.id in old_tree: old_lang = old_tree[lang.id] assert old_lang.level == lang.level if old_lang.name != lang.name: old_lang.name = lang.name if old_lang.iso != lang.iso: old_lang.iso = lang.iso old_lang.write_info(langdir) else: lang.write_info(langdir)
def test_isolate(self): from pyglottolog.languoids import Languoid l = Languoid.from_dir(self.tree.joinpath('isol1234')) self.assertTrue(l.isolate) self.assertIsNone(l.parent) self.assertIsNone(l.family)
def test_factory(self): f = Languoid.from_dir(self.api.tree.joinpath('abcd1234')) self.assertEqual(f.category, 'Family') l = Languoid.from_dir(self.api.tree.joinpath(f.id, 'abcd1235')) self.assertEqual(l.name, 'language') self.assertIn('abcd1235', repr(l)) self.assertIn('language', '%s' % l) self.assertEqual(l.level, Level.language) self.assertAlmostEqual(l.latitude, 0.5) self.assertAlmostEqual(l.longitude, 0.5) l.latitude, l.longitude = 1.0, 1.0 self.assertAlmostEqual(l.latitude, 1.0) self.assertAlmostEqual(l.longitude, 1.0) self.assertEqual(l.iso_code, 'abc') l.iso_code = 'cde' self.assertEqual(l.iso, 'cde') self.assertEqual(l.hid, 'abc') l.hid = 'abo' self.assertEqual(l.hid, 'abo') self.assertEqual(l.id, 'abcd1235') self.assertEqual(len(l.macroareas), 2) l.macroareas = [Macroarea.africa] self.assertEqual(l.macroareas, [Macroarea.africa]) l.countries = self.api.countries[:2] self.assertEqual(len(l.countries), 2) self.assertEqual(l.parent, f) self.assertEqual(f.children[0], l) self.assertEqual(l.children[0].family, f) l.write_info(self.tmp_path().as_posix()) self.assertTrue(self.tmp_path('abcd1235').exists()) self.assertIsInstance( self.api.languoid('abcd1235').iso_retirement.asdict(), dict) self.assertIsNone(l.classification_comment) l.endangerment = 'Critically endangered' self.assertEqual(l.endangerment, EndangermentStatus.critical) self.assertEqual(l.names, {}) l.cfg['altnames'] = {'glottolog': 'xyz'} self.assertIn('glottolog', l.names) self.assertEqual(l.identifier, {}) l.cfg['identifier'] = {'multitree': 'xyz'} self.assertIn('multitree', l.identifier)
def test_factory(tmpdir, api_copy): f = Languoid.from_dir(api_copy.tree / 'abcd1234') assert f.category == 'Family' l = Languoid.from_dir(api_copy.tree / f.id / 'abcd1235') assert l.name == 'language' assert 'abcd1235' in repr(l) assert 'language' in '%s' % l assert l.level == Level.language assert l.latitude == pytest.approx(0.5) assert l.longitude == pytest.approx(0.5) l.latitude, l.longitude = 1.0, 1.0 assert l.latitude == pytest.approx(1.0) assert l.longitude == pytest.approx(1.0) assert l.iso_code == 'abc' l.iso_code = 'cde' assert l.iso == 'cde' assert l.hid == 'abc' l.hid = 'abo' assert l.hid == 'abo' assert l.id == 'abcd1235' assert len(l.macroareas) == 2 l.macroareas = [Macroarea.africa] assert l.macroareas == [Macroarea.africa] l.countries = api_copy.countries[:2] assert len(l.countries) == 2 assert l.parent == f assert f.children[0] == l assert l.children[0].family == f l.write_info(str(tmpdir)) assert (tmpdir / 'abcd1235').exists() assert isinstance( api_copy.languoid('abcd1235').iso_retirement.asdict(), dict) assert l.classification_comment is None l.endangerment = 'nearly extinct' assert l.endangerment == EndangermentStatus.critical assert l.names == {} l.cfg['altnames'] = {'glottolog': 'xyz'} assert 'glottolog' in l.names assert l.identifier == {} l.cfg['identifier'] = {'multitree': 'xyz'} assert 'multitree' in l.identifier
def test_attrs(api): l = Languoid.from_name_id_level(api.tree, 'name', 'abcd1235', 'language', hid='NOCODE') l.name = 'other' assert l.name == 'other' with pytest.raises(AttributeError): l.glottocode = 'x' with pytest.raises(AttributeError): l.id = 'x' assert l.id == l.glottocode assert l.hid == 'NOCODE'
def test_attrs(self): from pyglottolog.languoids import Languoid, Level l = Languoid.from_name_id_level('name', 'abcd1235', Level.language, hid='NOCODE') l.name = 'other' self.assertEqual(l.name, 'other') with self.assertRaises(ValueError): l.glottocode = 'x' with self.assertRaises(ValueError): l.id = 'x' self.assertEqual(l.id, l.glottocode) self.assertEqual(l.hid, 'NOCODE')
def run(args): if Glottocode.pattern.match(args.parent): args.parent = get_languoid(args, args.parent).dir else: args.parent = pathlib.Path(args.parent) if not args.parent.exists(): raise ParserError('invalid parent dir specified') lang = Languoid.from_name_id_level( args.parent, args.name, args.repos.glottocodes.new(args.name), args.level, **dict(prop.split('=') for prop in args.props)) print("Info written to %s" % lang.write_info(outdir=args.parent))
def test_lff2tree(self): old, new = self.tmp_path('old'), self.tmp_path('new') old.mkdir() new.mkdir() _l = """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] Ubykh [ubyk1235][uby] Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243] Abaza [abaz1241][abq] Abkhazian [abkh1244][abk] Abkhaz-Adyge [abkh1242], Circassian [circ1239] Adyghe [adyg1241][ady] Kabardian [kaba1278][kbd] """ _d = """# -*- coding: utf-8 -*- Abaza [abaz1241] Ashkaraua [ashk1247][] Bezshagh [bezs1238][] Tapanta [tapa1256][] Abkhazian [abkh1244] Abzhui [abzh1238][] Bzyb [bzyb1238][] Samurzakan [samu1242][] """ def lffs(): return {Level.language: lff(_l), Level.dialect: lff(_d)} lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs()) lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs()) tree2lff( new, out_paths={ Level.language: self.tmp_path('lff'), Level.dialect: self.tmp_path('dff') } ) with self.tmp_path('lff').open() as fp: self.assertEqual(fp.read(), _l) with self.tmp_path('dff').open() as fp: self.assertEqual(fp.read(), _d) lffs_ = {Level.language: lff(_l.replace('Abaza', 'Abazul')), Level.dialect: lff(_d)} lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_) l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241')) self.assertEqual(l.name, 'Abazul') self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
def test_lff2tree(self): old, new = self.tmp_path('old'), self.tmp_path('new') old.mkdir() new.mkdir() _l = """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] Ubykh [ubyk1235][uby] Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243] Abaza [abaz1241][abq] Abkhazian [abkh1244][abk] Abkhaz-Adyge [abkh1242], Circassian [circ1239] Adyghe [adyg1241][ady] Kabardian [kaba1278][kbd] """ _d = """# -*- coding: utf-8 -*- Abaza [abaz1241] Ashkaraua [ashk1247][] Bezshagh [bezs1238][] Tapanta [tapa1256][] Abkhazian [abkh1244] Abzhui [abzh1238][] Bzyb [bzyb1238][] Samurzakan [samu1242][] """ def lffs(): return {Level.language: lff(_l), Level.dialect: lff(_d)} lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs()) lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs()) tree2lff(new, out_paths={ Level.language: self.tmp_path('lff'), Level.dialect: self.tmp_path('dff') }) with self.tmp_path('lff').open() as fp: self.assertEqual(fp.read(), _l) with self.tmp_path('dff').open() as fp: self.assertEqual(fp.read(), _d) lffs_ = { Level.language: lff(_l.replace('Abaza', 'Abazul')), Level.dialect: lff(_d) } lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_) l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241')) self.assertEqual(l.name, 'Abazul') self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
def create(args): """Create a new languoid directory for a languoid specified by name and level. glottolog create <parent> <name> <level> """ assert args.args[2] in ['family', 'language', 'dialect'] parent = args.repos.languoid(args.args[0]) or None outdir = parent.dir if parent else args.repos.tree lang = Languoid.from_name_id_level( outdir, args.args[1], args.repos.glottocodes.new(args.args[1]), getattr(Level, args.args[2]), **dict(prop.split('=') for prop in args.args[3:])) print("Info written to %s" % lang.write_info(outdir=outdir))
def test_attrs(self): l = Languoid.from_name_id_level(self.api.tree, 'name', 'abcd1235', Level.language, hid='NOCODE') l.name = 'other' self.assertEqual(l.name, 'other') with self.assertRaises(AttributeError): l.glottocode = 'x' with self.assertRaises(AttributeError): l.id = 'x' self.assertEqual(l.id, l.glottocode) self.assertEqual(l.hid, 'NOCODE')
def lang2tree(api, log, lang, lineage, out, old_tree): groupdir = out for spec in lineage: hid = -1 name, id_, level = spec[:3] if len(spec) == 4: hid = spec[3] groupdir = groupdir.joinpath(id_) if not groupdir.exists(): groupdir.mkdir() if id_ in old_tree: group = old_tree[id_] if group.level != level: log.info('{0} from {1} to {2}'.format( group, group.level, level)) group.level = level if name != group.name: # rename a subgroup! group.name = name else: group = Languoid.from_name_id_level(api.tree, name, id_, level) if hid != -1: if (hid in api.iso or hid is None) and group.iso != hid: group.iso = hid if hid != group.hid: group.hid = hid group.write_info(groupdir) langdir = groupdir.joinpath(lang.id) langdir.mkdir() if lang.id in old_tree: old_lang = old_tree[lang.id] if old_lang.level != lang.level: log.info('{0} from {1} to {2}'.format(old_lang, old_lang.level, lang.level)) old_lang.level = lang.level if old_lang.name != lang.name: old_lang.name = lang.name if old_lang.iso != lang.iso: old_lang.iso = lang.iso if lang.hid and old_lang.hid != lang.hid: old_lang.hid = lang.hid old_lang.write_info(langdir) else: lang.write_info(langdir)
def new_languoid(args): """Create a new languoid directory for a languoid specified by name and level. glottolog new_languoid <name> <level> """ assert args.args[1] in ['family', 'language', 'dialect'] lang = Languoid.from_name_id_level( args.args[0], Glottocode.from_name(args.args[0]), args.args[1], **dict(prop.split('=') for prop in args.args[2:])) # # FIXME: how to specify parent? Just mv there? # print("Info written to %s" % lang.write_info())
def languoid(api, log, new, path, lname, glottocode, isocode, level): if not glottocode: glottocode = new.get((lname, level)) if not glottocode: new[lname, level] = glottocode = api.glottocodes.new(lname) lineage = [] if path: for i, (name, id_, hid) in enumerate(path): if id_ == ISOLATE_ID: if i != 0 or len(path) != 1: log.error( 'invalid classification line for languoid: {0} [{1}]'. format(lname, glottocode)) raise ValueError('invalid isolate line') break _level = Level.family if level == Level.dialect: _level = Level.language if i == 0 else Level.dialect if not id_: id_ = new.get((name, _level)) if not id_: new[name, _level] = id_ = api.glottocodes.new(name) lineage.append((name, id_, _level, hid)) lang = Languoid.from_name_id_level(api.tree, lname, glottocode, level, lineage=[(r[0], r[1], r[2]) for r in lineage]) if (isocode in api.iso) or (isocode is None): lang.iso = isocode lang.hid = isocode return lang, lineage
def test_isolate(api): l = Languoid.from_dir(api.tree / 'isol1234') assert l.isolate assert l.parent is None assert l.family is None
def test_isolate(self): l = Languoid.from_dir(self.api.tree.joinpath('isol1234')) self.assertTrue(l.isolate) self.assertIsNone(l.parent) self.assertIsNone(l.family)
def test_factory_without_api(api_copy): f = Languoid.from_dir(api_copy.tree / 'abcd1234', _api=api_copy) l = Languoid.from_dir(api_copy.tree / f.id / 'abcd1235') assert len( l.macroareas) == 0 # No API passed at initialization => no macroareas!