예제 #1
0
def lff2tree(args):
    """Recreate tree from lff.txt and dff.txt

    glottolog lff2tree [test]
    """
    try:
        lff.lff2tree(args.repos, args.log)
    except ValueError:  # pragma: no cover
        print("""
Something went wrong! Roll back inconsistent state running

    rm -rf languoids
    git checkout languoids
""")
        raise

    if args.args and args.args[0] == 'test':  # pragma: no cover
        print("""
You can run

    diff -rbB build/tree/ languoids/tree/

to inspect the changes in the directory tree.
""")
    else:
        print("""
Run

    git status

to inspect changes in the directory tree.
You can run

    diff -rbB build/tree/ languoids/tree/

to inspect the changes in detail.

- To discard changes run

    git checkout languoids/tree

- To commit and push changes, run

    git add -A languoids/tree/...

  for any newly created nodes listed under

# Untracked files:
#   (use "git add <file>..." to include in what will be committed)
#
#	languoids/tree/...

  followed by

    git commit -a -m"reason for change of classification"
    git push origin
""")
예제 #2
0
def test_lff2tree_update_iso_code(api_copy):
    # https://github.com/glottolog/pyglottolog/issues/47
    md = api_copy.path('languoids', 'tree', 'abcd1234', 'md.ini')
    # We remove the iso code from the languoid
    text = '\n'.join(
        [l for l in md.read_text(encoding='utf8').split('\n') if not l.startswith('iso')])
    md.write_text(text, encoding='utf8')
    # Now the following lff should reset hid **and** iso code:
    lfftext = _set_lff(api_copy, 'lff.txt', """# -*- coding: utf-8 -*-
Name [abcd1234] aaa
    Other [abcd1235] abk
    """)
    _set_lff(api_copy, 'dff.txt', "")
    lff2tree(api_copy)
    assert api_copy.languoid('abcd1234').iso == 'aaa'
예제 #3
0
def lff2tree(args):
    """Recreate tree from lff.txt and dff.txt

    glottolog lff2tree [test]
    """
    lff.lff2tree()
    if args.args and args.args[0] == 'test':
        print("""
You can run

    diff -rbB build/tree/ languoids/tree/

to inspect the changes in the directory tree.
""")
    else:
        print("""
Run

    git status

to inspect changes in the directory tree.
You can run

    diff -rbB build/tree/ languoids/tree/

to inspect the changes in detail.

- To discard changes run

    git checkout languoids/tree

- To commit and push changes, run

    git add languoids/tree/...

  for any newly created nodes listed under

# Untracked files:
#   (use "git add <file>..." to include in what will be committed)
#
#	languoids/tree/...

  followed by

    git commit -a -m"reason for change of classification"
    git push origin
""")
예제 #4
0
파일: test_lff.py 프로젝트: clld/glottolog
    def test_lff2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        _l = """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235][uby]
Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241][abq]
    Abkhazian [abkh1244][abk]
Abkhaz-Adyge [abkh1242], Circassian [circ1239]
    Adyghe [adyg1241][ady]
    Kabardian [kaba1278][kbd]
"""

        _d = """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Ashkaraua [ashk1247][]
    Bezshagh [bezs1238][]
    Tapanta [tapa1256][]
Abkhazian [abkh1244]
    Abzhui [abzh1238][]
    Bzyb [bzyb1238][]
    Samurzakan [samu1242][]
"""

        def lffs():
            return {Level.language: lff(_l), Level.dialect: lff(_d)}

        lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs())
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs())
        tree2lff(
            new,
            out_paths={
                Level.language: self.tmp_path('lff'),
                Level.dialect: self.tmp_path('dff')
            }
        )
        with self.tmp_path('lff').open() as fp:
            self.assertEqual(fp.read(), _l)

        with self.tmp_path('dff').open() as fp:
            self.assertEqual(fp.read(), _d)

        lffs_ = {Level.language: lff(_l.replace('Abaza', 'Abazul')),
                 Level.dialect: lff(_d)}
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_)
        l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241'))
        self.assertEqual(l.name, 'Abazul')
        self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
예제 #5
0
    def test_lff2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        _l = """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235][uby]
Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241][abq]
    Abkhazian [abkh1244][abk]
Abkhaz-Adyge [abkh1242], Circassian [circ1239]
    Adyghe [adyg1241][ady]
    Kabardian [kaba1278][kbd]
"""

        _d = """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Ashkaraua [ashk1247][]
    Bezshagh [bezs1238][]
    Tapanta [tapa1256][]
Abkhazian [abkh1244]
    Abzhui [abzh1238][]
    Bzyb [bzyb1238][]
    Samurzakan [samu1242][]
"""

        def lffs():
            return {Level.language: lff(_l), Level.dialect: lff(_d)}

        lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs())
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs())
        tree2lff(new,
                 out_paths={
                     Level.language: self.tmp_path('lff'),
                     Level.dialect: self.tmp_path('dff')
                 })
        with self.tmp_path('lff').open() as fp:
            self.assertEqual(fp.read(), _l)

        with self.tmp_path('dff').open() as fp:
            self.assertEqual(fp.read(), _d)

        lffs_ = {
            Level.language: lff(_l.replace('Abaza', 'Abazul')),
            Level.dialect: lff(_d)
        }
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_)
        l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241'))
        self.assertEqual(l.name, 'Abazul')
        self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
예제 #6
0
def test_lff2tree(api_copy):
    lfftext = _set_lff(
        api_copy, 'lff.txt', """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242] aaa
    Ubykh [ubyk1235]uby
Abkhaz-Adyge [abkh1242] aaa; Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241]abq
    Abkhazian [abkh1244]abk
Abkhaz-Adyge [abkh1242] aaa; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]kbd
""")

    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Abaza [abaz1241] abq
    Ashkaraua [ashk1247]
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244] abk
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
""")

    lff2tree(api_copy)
    assert api_copy.languoid('abkh1242').iso == 'aaa'
    assert api_copy.languoid(
        'ashk1247').level == api_copy.languoid_levels.dialect
    assert api_copy.languoid(
        'abaz1241').level == api_copy.languoid_levels.language
    assert api_copy.languoid('abaz1241').hid == 'abq'

    _set_lff(api_copy, 'lff.txt',
             lfftext.replace('Abkhaz-Abaza', 'Abkhaz-Abazzza'))
    lff2tree(api_copy)
    glottocodes = [d.name for d in walk(api_copy.tree, mode='dirs')]
    assert len(glottocodes) == len(set(glottocodes))

    abkh1243 = api_copy.languoid('abkh1243')
    # Make sure the new name is picked up ...
    assert abkh1243.name == 'Abkhaz-Abazzza'
    # ... and the old one retained as alternative name:
    assert 'Abkhaz-Abaza' in abkh1243.names['glottolog']

    lfftext = _set_lff(
        api_copy, 'lff.txt', """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235]
Abkhaz-Adyge [abkh1242]; Abkhaz-Abaza [abkh1243]; Abaza [abaz1241]
    Ashkaraua [ashk1247]xyz
    Abkhazian [abkh1244]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]; New Group []
    New name []NOCODE_New-name
    Another one []
""")

    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244]
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
Kabardian [kaba1278]
    Dia []aaa
""")

    lff2tree(api_copy)
    assert api_copy.languoid(
        'abaz1241').level == api_copy.languoid_levels.family
    # Now we test two things:
    # - aaa has been removed as ISO code from abkh1242
    # - aaa has been attached as ISO code to a newly created language
    assert api_copy.languoid('aaa').name == 'Dia'
    langs = list(api_copy.languoids())
    assert 'newg1234' in api_copy.glottocodes
    assert sum(1 for l in langs if l.name == 'New Group') == 1
    assert sum(1 for l in langs if l.hid == 'NOCODE_New-name') == 1

    # Test ISO code removal:
    _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Kabardian [kaba1278]
    Dia []
""")
    lff2tree(api_copy)
    assert api_copy.languoid('aaa') is None

    tree2lff(api_copy)

    # Test hid adding
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkarauax [bezs1238]NOCODE_abc
""")
    lff2tree(api_copy)
    assert api_copy.languoid('bezs1238').hid == 'NOCODE_abc'

    #
    # Nodes must have unique names!
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkaraua [bezs1238]
""")
    with pytest.raises(ValueError, match=r'duplicate'):
        lff2tree(api_copy)

    #
    # Nodes must have consistent names!
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkxxxaraua [ashk1247]xyz
    Bezshagh [bezs1238]
""")
    with pytest.raises(ValueError, match=r'inconsistent'):
        lff2tree(api_copy)

    #
    # Top-level nodes in dff must be languages:
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Bezshagh [bezs1238]
""")
    with pytest.raises(ValueError, match=r'inconsistent'):
        lff2tree(api_copy)

    #
    # Top-level nodes in dff must be languages in lff:
    #
    _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
None [xyzz1234]
    Dia []
""")
    with pytest.raises(ValueError, match=r'invalid'):
        lff2tree(api_copy)

    #
    # Isolates must not have multiple ancestors:
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
None [xyzz1234]; Other [-isolate-]
    Dia []
""")
    with pytest.raises(ValueError, match=r'isolate'):
        lff2tree(api_copy)

    #
    # Languages must appear after a classification line:
    #
    _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
    Dia []
""")
    with pytest.raises(ValueError, match=r'classification'):
        lff2tree(api_copy)
예제 #7
0
    def test_lff2tree(self):
        lfftext = self._set_lff(
            """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242] aaa
    Ubykh [ubyk1235]uby
Abkhaz-Adyge [abkh1242] aaa; Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241]abq
    Abkhazian [abkh1244]abk
Abkhaz-Adyge [abkh1242] aaa; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]kbd
""", 'lff.txt')

        self._set_lff(
            """# -*- coding: utf-8 -*-
Abaza [abaz1241] abq
    Ashkaraua [ashk1247]
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244] abk
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
""", 'dff.txt')

        lff2tree(self.api)
        self.assertEqual(self.api.languoid('abkh1242').iso, 'aaa')
        self.assertEqual(self.api.languoid('ashk1247').level, Level.dialect)
        self.assertEqual(self.api.languoid('abaz1241').level, Level.language)
        self.assertEqual(self.api.languoid('abaz1241').hid, 'abq')

        self._set_lff(lfftext.replace('Abkhaz-Abaza', 'Abkhaz-Abazzza'),
                      'lff.txt')
        lff2tree(self.api)
        glottocodes = [d.name for d in walk(self.api.tree, mode='dirs')]
        self.assertEqual(len(glottocodes), len(set(glottocodes)))
        self.assertEqual(self.api.languoid('abkh1243').name, 'Abkhaz-Abazzza')

        lfftext = self._set_lff(
            """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235]
Abkhaz-Adyge [abkh1242]; Abkhaz-Abaza [abkh1243]; Abaza [abaz1241]
    Ashkaraua [ashk1247]xyz
    Abkhazian [abkh1244]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]; New Group []
    New name []NOCODE_New-name
    Another one []
""", 'lff.txt')

        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244]
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
Kabardian [kaba1278]
    Dia []aaa
""", 'dff.txt')

        lff2tree(self.api)
        self.assertEqual(self.api.languoid('abaz1241').level, Level.family)
        # Now we test two things:
        # - aaa has been removed as ISO code from abkh1242
        # - aaa has been attached as ISO code to a newly created language
        self.assertEqual(self.api.languoid('aaa').name, 'Dia')
        langs = list(self.api.languoids())
        self.assertIn('newg1234', self.api.glottocodes)
        self.assertEqual(len([l for l in langs if l.name == 'New Group']), 1)
        self.assertEqual(len([l for l in langs if l.hid == 'NOCODE_New-name']),
                         1)

        # Test ISO code removal:
        self._set_lff(
            """# -*- coding: utf-8 -*-
Kabardian [kaba1278]
    Dia []
""", 'dff.txt')
        lff2tree(self.api)
        self.assertIsNone(self.api.languoid('aaa'))

        tree2lff(self.api)

        # Test hid adding
        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkarauax [bezs1238]NOCODE_abc
""", 'dff.txt')
        lff2tree(self.api)
        self.assertEqual(self.api.languoid('bezs1238').hid, 'NOCODE_abc')

        #
        # Nodes must have unique names!
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkaraua [bezs1238]
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'duplicate'):
            lff2tree(self.api)

        #
        # Nodes must have consistent names!
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkxxxaraua [ashk1247]xyz
    Bezshagh [bezs1238]
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'inconsistent'):
            lff2tree(self.api)

        #
        # Top-level nodes in dff must be languages:
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Bezshagh [bezs1238]
""", 'dff.txt')
        with self.assertRaises(ValueError):
            lff2tree(self.api)

        #
        # Top-level nodes in dff must be languages in lff:
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
None [xyzz1234]
    Dia []
""", 'dff.txt')
        with self.assertRaises(ValueError):
            lff2tree(self.api)

        #
        # Isolates must not have multiple ancestors:
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
None [xyzz1234]; Other [-isolate-]
    Dia []
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'isolate'):
            lff2tree(self.api)

        #
        # Languages must appear after a classification line:
        #
        self._set_lff("""# -*- coding: utf-8 -*-
    Dia []
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'classification'):
            lff2tree(self.api)