def lff2tree(args): """Recreate tree from lff.txt and dff.txt glottolog lff2tree [test] """ try: lff.lff2tree(args.repos, args.log) except ValueError: # pragma: no cover print(""" Something went wrong! Roll back inconsistent state running rm -rf languoids git checkout languoids """) raise if args.args and args.args[0] == 'test': # pragma: no cover print(""" You can run diff -rbB build/tree/ languoids/tree/ to inspect the changes in the directory tree. """) else: print(""" Run git status to inspect changes in the directory tree. You can run diff -rbB build/tree/ languoids/tree/ to inspect the changes in detail. - To discard changes run git checkout languoids/tree - To commit and push changes, run git add -A languoids/tree/... for any newly created nodes listed under # Untracked files: # (use "git add <file>..." to include in what will be committed) # # languoids/tree/... followed by git commit -a -m"reason for change of classification" git push origin """)
def test_lff2tree_update_iso_code(api_copy): # https://github.com/glottolog/pyglottolog/issues/47 md = api_copy.path('languoids', 'tree', 'abcd1234', 'md.ini') # We remove the iso code from the languoid text = '\n'.join( [l for l in md.read_text(encoding='utf8').split('\n') if not l.startswith('iso')]) md.write_text(text, encoding='utf8') # Now the following lff should reset hid **and** iso code: lfftext = _set_lff(api_copy, 'lff.txt', """# -*- coding: utf-8 -*- Name [abcd1234] aaa Other [abcd1235] abk """) _set_lff(api_copy, 'dff.txt', "") lff2tree(api_copy) assert api_copy.languoid('abcd1234').iso == 'aaa'
def lff2tree(args): """Recreate tree from lff.txt and dff.txt glottolog lff2tree [test] """ lff.lff2tree() if args.args and args.args[0] == 'test': print(""" You can run diff -rbB build/tree/ languoids/tree/ to inspect the changes in the directory tree. """) else: print(""" Run git status to inspect changes in the directory tree. You can run diff -rbB build/tree/ languoids/tree/ to inspect the changes in detail. - To discard changes run git checkout languoids/tree - To commit and push changes, run git add languoids/tree/... for any newly created nodes listed under # Untracked files: # (use "git add <file>..." to include in what will be committed) # # languoids/tree/... followed by git commit -a -m"reason for change of classification" git push origin """)
def test_lff2tree(self): old, new = self.tmp_path('old'), self.tmp_path('new') old.mkdir() new.mkdir() _l = """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] Ubykh [ubyk1235][uby] Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243] Abaza [abaz1241][abq] Abkhazian [abkh1244][abk] Abkhaz-Adyge [abkh1242], Circassian [circ1239] Adyghe [adyg1241][ady] Kabardian [kaba1278][kbd] """ _d = """# -*- coding: utf-8 -*- Abaza [abaz1241] Ashkaraua [ashk1247][] Bezshagh [bezs1238][] Tapanta [tapa1256][] Abkhazian [abkh1244] Abzhui [abzh1238][] Bzyb [bzyb1238][] Samurzakan [samu1242][] """ def lffs(): return {Level.language: lff(_l), Level.dialect: lff(_d)} lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs()) lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs()) tree2lff( new, out_paths={ Level.language: self.tmp_path('lff'), Level.dialect: self.tmp_path('dff') } ) with self.tmp_path('lff').open() as fp: self.assertEqual(fp.read(), _l) with self.tmp_path('dff').open() as fp: self.assertEqual(fp.read(), _d) lffs_ = {Level.language: lff(_l.replace('Abaza', 'Abazul')), Level.dialect: lff(_d)} lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_) l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241')) self.assertEqual(l.name, 'Abazul') self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
def test_lff2tree(self): old, new = self.tmp_path('old'), self.tmp_path('new') old.mkdir() new.mkdir() _l = """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] Ubykh [ubyk1235][uby] Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243] Abaza [abaz1241][abq] Abkhazian [abkh1244][abk] Abkhaz-Adyge [abkh1242], Circassian [circ1239] Adyghe [adyg1241][ady] Kabardian [kaba1278][kbd] """ _d = """# -*- coding: utf-8 -*- Abaza [abaz1241] Ashkaraua [ashk1247][] Bezshagh [bezs1238][] Tapanta [tapa1256][] Abkhazian [abkh1244] Abzhui [abzh1238][] Bzyb [bzyb1238][] Samurzakan [samu1242][] """ def lffs(): return {Level.language: lff(_l), Level.dialect: lff(_d)} lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs()) lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs()) tree2lff(new, out_paths={ Level.language: self.tmp_path('lff'), Level.dialect: self.tmp_path('dff') }) with self.tmp_path('lff').open() as fp: self.assertEqual(fp.read(), _l) with self.tmp_path('dff').open() as fp: self.assertEqual(fp.read(), _d) lffs_ = { Level.language: lff(_l.replace('Abaza', 'Abazul')), Level.dialect: lff(_d) } lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_) l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241')) self.assertEqual(l.name, 'Abazul') self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
def test_lff2tree(api_copy): lfftext = _set_lff( api_copy, 'lff.txt', """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] aaa Ubykh [ubyk1235]uby Abkhaz-Adyge [abkh1242] aaa; Abkhaz-Abaza [abkh1243] Abaza [abaz1241]abq Abkhazian [abkh1244]abk Abkhaz-Adyge [abkh1242] aaa; Circassian [circ1239] Adyghe [adyg1241]ady Kabardian [kaba1278]kbd """) _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Abaza [abaz1241] abq Ashkaraua [ashk1247] Bezshagh [bezs1238] Tapanta [tapa1256] Abkhazian [abkh1244] abk Abzhui [abzh1238] Bzyb [bzyb1238] Samurzakan [samu1242] """) lff2tree(api_copy) assert api_copy.languoid('abkh1242').iso == 'aaa' assert api_copy.languoid( 'ashk1247').level == api_copy.languoid_levels.dialect assert api_copy.languoid( 'abaz1241').level == api_copy.languoid_levels.language assert api_copy.languoid('abaz1241').hid == 'abq' _set_lff(api_copy, 'lff.txt', lfftext.replace('Abkhaz-Abaza', 'Abkhaz-Abazzza')) lff2tree(api_copy) glottocodes = [d.name for d in walk(api_copy.tree, mode='dirs')] assert len(glottocodes) == len(set(glottocodes)) abkh1243 = api_copy.languoid('abkh1243') # Make sure the new name is picked up ... assert abkh1243.name == 'Abkhaz-Abazzza' # ... and the old one retained as alternative name: assert 'Abkhaz-Abaza' in abkh1243.names['glottolog'] lfftext = _set_lff( api_copy, 'lff.txt', """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] Ubykh [ubyk1235] Abkhaz-Adyge [abkh1242]; Abkhaz-Abaza [abkh1243]; Abaza [abaz1241] Ashkaraua [ashk1247]xyz Abkhazian [abkh1244] Abkhaz-Adyge [abkh1242]; Circassian [circ1239] Adyghe [adyg1241]ady Kabardian [kaba1278] Abkhaz-Adyge [abkh1242]; Circassian [circ1239]; New Group [] New name []NOCODE_New-name Another one [] """) _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Ashkaraua [ashk1247]xyz Bezshagh [bezs1238] Tapanta [tapa1256] Abkhazian [abkh1244] Abzhui [abzh1238] Bzyb [bzyb1238] Samurzakan [samu1242] Kabardian [kaba1278] Dia []aaa """) lff2tree(api_copy) assert api_copy.languoid( 'abaz1241').level == api_copy.languoid_levels.family # Now we test two things: # - aaa has been removed as ISO code from abkh1242 # - aaa has been attached as ISO code to a newly created language assert api_copy.languoid('aaa').name == 'Dia' langs = list(api_copy.languoids()) assert 'newg1234' in api_copy.glottocodes assert sum(1 for l in langs if l.name == 'New Group') == 1 assert sum(1 for l in langs if l.hid == 'NOCODE_New-name') == 1 # Test ISO code removal: _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Kabardian [kaba1278] Dia [] """) lff2tree(api_copy) assert api_copy.languoid('aaa') is None tree2lff(api_copy) # Test hid adding _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Ashkaraua [ashk1247]xyz Ashkarauax [bezs1238]NOCODE_abc """) lff2tree(api_copy) assert api_copy.languoid('bezs1238').hid == 'NOCODE_abc' # # Nodes must have unique names! # _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Ashkaraua [ashk1247]xyz Ashkaraua [bezs1238] """) with pytest.raises(ValueError, match=r'duplicate'): lff2tree(api_copy) # # Nodes must have consistent names! # _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Ashkxxxaraua [ashk1247]xyz Bezshagh [bezs1238] """) with pytest.raises(ValueError, match=r'inconsistent'): lff2tree(api_copy) # # Top-level nodes in dff must be languages: # _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Abaza [abaz1241] Bezshagh [bezs1238] """) with pytest.raises(ValueError, match=r'inconsistent'): lff2tree(api_copy) # # Top-level nodes in dff must be languages in lff: # _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*- None [xyzz1234] Dia [] """) with pytest.raises(ValueError, match=r'invalid'): lff2tree(api_copy) # # Isolates must not have multiple ancestors: # _set_lff( api_copy, 'dff.txt', """# -*- coding: utf-8 -*- None [xyzz1234]; Other [-isolate-] Dia [] """) with pytest.raises(ValueError, match=r'isolate'): lff2tree(api_copy) # # Languages must appear after a classification line: # _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*- Dia [] """) with pytest.raises(ValueError, match=r'classification'): lff2tree(api_copy)
def test_lff2tree(self): lfftext = self._set_lff( """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] aaa Ubykh [ubyk1235]uby Abkhaz-Adyge [abkh1242] aaa; Abkhaz-Abaza [abkh1243] Abaza [abaz1241]abq Abkhazian [abkh1244]abk Abkhaz-Adyge [abkh1242] aaa; Circassian [circ1239] Adyghe [adyg1241]ady Kabardian [kaba1278]kbd """, 'lff.txt') self._set_lff( """# -*- coding: utf-8 -*- Abaza [abaz1241] abq Ashkaraua [ashk1247] Bezshagh [bezs1238] Tapanta [tapa1256] Abkhazian [abkh1244] abk Abzhui [abzh1238] Bzyb [bzyb1238] Samurzakan [samu1242] """, 'dff.txt') lff2tree(self.api) self.assertEqual(self.api.languoid('abkh1242').iso, 'aaa') self.assertEqual(self.api.languoid('ashk1247').level, Level.dialect) self.assertEqual(self.api.languoid('abaz1241').level, Level.language) self.assertEqual(self.api.languoid('abaz1241').hid, 'abq') self._set_lff(lfftext.replace('Abkhaz-Abaza', 'Abkhaz-Abazzza'), 'lff.txt') lff2tree(self.api) glottocodes = [d.name for d in walk(self.api.tree, mode='dirs')] self.assertEqual(len(glottocodes), len(set(glottocodes))) self.assertEqual(self.api.languoid('abkh1243').name, 'Abkhaz-Abazzza') lfftext = self._set_lff( """# -*- coding: utf-8 -*- Abkhaz-Adyge [abkh1242] Ubykh [ubyk1235] Abkhaz-Adyge [abkh1242]; Abkhaz-Abaza [abkh1243]; Abaza [abaz1241] Ashkaraua [ashk1247]xyz Abkhazian [abkh1244] Abkhaz-Adyge [abkh1242]; Circassian [circ1239] Adyghe [adyg1241]ady Kabardian [kaba1278] Abkhaz-Adyge [abkh1242]; Circassian [circ1239]; New Group [] New name []NOCODE_New-name Another one [] """, 'lff.txt') self._set_lff( """# -*- coding: utf-8 -*- Ashkaraua [ashk1247]xyz Bezshagh [bezs1238] Tapanta [tapa1256] Abkhazian [abkh1244] Abzhui [abzh1238] Bzyb [bzyb1238] Samurzakan [samu1242] Kabardian [kaba1278] Dia []aaa """, 'dff.txt') lff2tree(self.api) self.assertEqual(self.api.languoid('abaz1241').level, Level.family) # Now we test two things: # - aaa has been removed as ISO code from abkh1242 # - aaa has been attached as ISO code to a newly created language self.assertEqual(self.api.languoid('aaa').name, 'Dia') langs = list(self.api.languoids()) self.assertIn('newg1234', self.api.glottocodes) self.assertEqual(len([l for l in langs if l.name == 'New Group']), 1) self.assertEqual(len([l for l in langs if l.hid == 'NOCODE_New-name']), 1) # Test ISO code removal: self._set_lff( """# -*- coding: utf-8 -*- Kabardian [kaba1278] Dia [] """, 'dff.txt') lff2tree(self.api) self.assertIsNone(self.api.languoid('aaa')) tree2lff(self.api) # Test hid adding self._set_lff( """# -*- coding: utf-8 -*- Ashkaraua [ashk1247]xyz Ashkarauax [bezs1238]NOCODE_abc """, 'dff.txt') lff2tree(self.api) self.assertEqual(self.api.languoid('bezs1238').hid, 'NOCODE_abc') # # Nodes must have unique names! # self._set_lff( """# -*- coding: utf-8 -*- Ashkaraua [ashk1247]xyz Ashkaraua [bezs1238] """, 'dff.txt') with self.assertRaisesRegexp(ValueError, 'duplicate'): lff2tree(self.api) # # Nodes must have consistent names! # self._set_lff( """# -*- coding: utf-8 -*- Ashkxxxaraua [ashk1247]xyz Bezshagh [bezs1238] """, 'dff.txt') with self.assertRaisesRegexp(ValueError, 'inconsistent'): lff2tree(self.api) # # Top-level nodes in dff must be languages: # self._set_lff( """# -*- coding: utf-8 -*- Abaza [abaz1241] Bezshagh [bezs1238] """, 'dff.txt') with self.assertRaises(ValueError): lff2tree(self.api) # # Top-level nodes in dff must be languages in lff: # self._set_lff( """# -*- coding: utf-8 -*- None [xyzz1234] Dia [] """, 'dff.txt') with self.assertRaises(ValueError): lff2tree(self.api) # # Isolates must not have multiple ancestors: # self._set_lff( """# -*- coding: utf-8 -*- None [xyzz1234]; Other [-isolate-] Dia [] """, 'dff.txt') with self.assertRaisesRegexp(ValueError, 'isolate'): lff2tree(self.api) # # Languages must appear after a classification line: # self._set_lff("""# -*- coding: utf-8 -*- Dia [] """, 'dff.txt') with self.assertRaisesRegexp(ValueError, 'classification'): lff2tree(self.api)