Example #1
0
    def test_lff2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        _l = """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235][uby]
Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241][abq]
    Abkhazian [abkh1244][abk]
Abkhaz-Adyge [abkh1242], Circassian [circ1239]
    Adyghe [adyg1241][ady]
    Kabardian [kaba1278][kbd]
"""

        _d = """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Ashkaraua [ashk1247][]
    Bezshagh [bezs1238][]
    Tapanta [tapa1256][]
Abkhazian [abkh1244]
    Abzhui [abzh1238][]
    Bzyb [bzyb1238][]
    Samurzakan [samu1242][]
"""

        def lffs():
            return {Level.language: lff(_l), Level.dialect: lff(_d)}

        lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs())
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs())
        tree2lff(
            new,
            out_paths={
                Level.language: self.tmp_path('lff'),
                Level.dialect: self.tmp_path('dff')
            }
        )
        with self.tmp_path('lff').open() as fp:
            self.assertEqual(fp.read(), _l)

        with self.tmp_path('dff').open() as fp:
            self.assertEqual(fp.read(), _d)

        lffs_ = {Level.language: lff(_l.replace('Abaza', 'Abazul')),
                 Level.dialect: lff(_d)}
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_)
        l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241'))
        self.assertEqual(l.name, 'Abazul')
        self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
Example #2
0
    def test_lff2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        _l = """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235][uby]
Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241][abq]
    Abkhazian [abkh1244][abk]
Abkhaz-Adyge [abkh1242], Circassian [circ1239]
    Adyghe [adyg1241][ady]
    Kabardian [kaba1278][kbd]
"""

        _d = """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Ashkaraua [ashk1247][]
    Bezshagh [bezs1238][]
    Tapanta [tapa1256][]
Abkhazian [abkh1244]
    Abzhui [abzh1238][]
    Bzyb [bzyb1238][]
    Samurzakan [samu1242][]
"""

        def lffs():
            return {Level.language: lff(_l), Level.dialect: lff(_d)}

        lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs())
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs())
        tree2lff(new,
                 out_paths={
                     Level.language: self.tmp_path('lff'),
                     Level.dialect: self.tmp_path('dff')
                 })
        with self.tmp_path('lff').open() as fp:
            self.assertEqual(fp.read(), _l)

        with self.tmp_path('dff').open() as fp:
            self.assertEqual(fp.read(), _d)

        lffs_ = {
            Level.language: lff(_l.replace('Abaza', 'Abazul')),
            Level.dialect: lff(_d)
        }
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_)
        l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241'))
        self.assertEqual(l.name, 'Abazul')
        self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
Example #3
0
def test_lff2tree(api_copy):
    lfftext = _set_lff(
        api_copy, 'lff.txt', """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242] aaa
    Ubykh [ubyk1235]uby
Abkhaz-Adyge [abkh1242] aaa; Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241]abq
    Abkhazian [abkh1244]abk
Abkhaz-Adyge [abkh1242] aaa; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]kbd
""")

    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Abaza [abaz1241] abq
    Ashkaraua [ashk1247]
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244] abk
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
""")

    lff2tree(api_copy)
    assert api_copy.languoid('abkh1242').iso == 'aaa'
    assert api_copy.languoid(
        'ashk1247').level == api_copy.languoid_levels.dialect
    assert api_copy.languoid(
        'abaz1241').level == api_copy.languoid_levels.language
    assert api_copy.languoid('abaz1241').hid == 'abq'

    _set_lff(api_copy, 'lff.txt',
             lfftext.replace('Abkhaz-Abaza', 'Abkhaz-Abazzza'))
    lff2tree(api_copy)
    glottocodes = [d.name for d in walk(api_copy.tree, mode='dirs')]
    assert len(glottocodes) == len(set(glottocodes))

    abkh1243 = api_copy.languoid('abkh1243')
    # Make sure the new name is picked up ...
    assert abkh1243.name == 'Abkhaz-Abazzza'
    # ... and the old one retained as alternative name:
    assert 'Abkhaz-Abaza' in abkh1243.names['glottolog']

    lfftext = _set_lff(
        api_copy, 'lff.txt', """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235]
Abkhaz-Adyge [abkh1242]; Abkhaz-Abaza [abkh1243]; Abaza [abaz1241]
    Ashkaraua [ashk1247]xyz
    Abkhazian [abkh1244]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]; New Group []
    New name []NOCODE_New-name
    Another one []
""")

    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244]
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
Kabardian [kaba1278]
    Dia []aaa
""")

    lff2tree(api_copy)
    assert api_copy.languoid(
        'abaz1241').level == api_copy.languoid_levels.family
    # Now we test two things:
    # - aaa has been removed as ISO code from abkh1242
    # - aaa has been attached as ISO code to a newly created language
    assert api_copy.languoid('aaa').name == 'Dia'
    langs = list(api_copy.languoids())
    assert 'newg1234' in api_copy.glottocodes
    assert sum(1 for l in langs if l.name == 'New Group') == 1
    assert sum(1 for l in langs if l.hid == 'NOCODE_New-name') == 1

    # Test ISO code removal:
    _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Kabardian [kaba1278]
    Dia []
""")
    lff2tree(api_copy)
    assert api_copy.languoid('aaa') is None

    tree2lff(api_copy)

    # Test hid adding
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkarauax [bezs1238]NOCODE_abc
""")
    lff2tree(api_copy)
    assert api_copy.languoid('bezs1238').hid == 'NOCODE_abc'

    #
    # Nodes must have unique names!
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkaraua [bezs1238]
""")
    with pytest.raises(ValueError, match=r'duplicate'):
        lff2tree(api_copy)

    #
    # Nodes must have consistent names!
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Ashkxxxaraua [ashk1247]xyz
    Bezshagh [bezs1238]
""")
    with pytest.raises(ValueError, match=r'inconsistent'):
        lff2tree(api_copy)

    #
    # Top-level nodes in dff must be languages:
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Bezshagh [bezs1238]
""")
    with pytest.raises(ValueError, match=r'inconsistent'):
        lff2tree(api_copy)

    #
    # Top-level nodes in dff must be languages in lff:
    #
    _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
None [xyzz1234]
    Dia []
""")
    with pytest.raises(ValueError, match=r'invalid'):
        lff2tree(api_copy)

    #
    # Isolates must not have multiple ancestors:
    #
    _set_lff(
        api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
None [xyzz1234]; Other [-isolate-]
    Dia []
""")
    with pytest.raises(ValueError, match=r'isolate'):
        lff2tree(api_copy)

    #
    # Languages must appear after a classification line:
    #
    _set_lff(api_copy, 'dff.txt', """# -*- coding: utf-8 -*-
    Dia []
""")
    with pytest.raises(ValueError, match=r'classification'):
        lff2tree(api_copy)
Example #4
0
    def test_lff2tree(self):
        lfftext = self._set_lff(
            """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242] aaa
    Ubykh [ubyk1235]uby
Abkhaz-Adyge [abkh1242] aaa; Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241]abq
    Abkhazian [abkh1244]abk
Abkhaz-Adyge [abkh1242] aaa; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]kbd
""", 'lff.txt')

        self._set_lff(
            """# -*- coding: utf-8 -*-
Abaza [abaz1241] abq
    Ashkaraua [ashk1247]
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244] abk
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
""", 'dff.txt')

        lff2tree(self.api)
        self.assertEqual(self.api.languoid('abkh1242').iso, 'aaa')
        self.assertEqual(self.api.languoid('ashk1247').level, Level.dialect)
        self.assertEqual(self.api.languoid('abaz1241').level, Level.language)
        self.assertEqual(self.api.languoid('abaz1241').hid, 'abq')

        self._set_lff(lfftext.replace('Abkhaz-Abaza', 'Abkhaz-Abazzza'),
                      'lff.txt')
        lff2tree(self.api)
        glottocodes = [d.name for d in walk(self.api.tree, mode='dirs')]
        self.assertEqual(len(glottocodes), len(set(glottocodes)))
        self.assertEqual(self.api.languoid('abkh1243').name, 'Abkhaz-Abazzza')

        lfftext = self._set_lff(
            """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235]
Abkhaz-Adyge [abkh1242]; Abkhaz-Abaza [abkh1243]; Abaza [abaz1241]
    Ashkaraua [ashk1247]xyz
    Abkhazian [abkh1244]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]
    Adyghe [adyg1241]ady
    Kabardian [kaba1278]
Abkhaz-Adyge [abkh1242]; Circassian [circ1239]; New Group []
    New name []NOCODE_New-name
    Another one []
""", 'lff.txt')

        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Bezshagh [bezs1238]
    Tapanta [tapa1256]
Abkhazian [abkh1244]
    Abzhui [abzh1238]
    Bzyb [bzyb1238]
    Samurzakan [samu1242]
Kabardian [kaba1278]
    Dia []aaa
""", 'dff.txt')

        lff2tree(self.api)
        self.assertEqual(self.api.languoid('abaz1241').level, Level.family)
        # Now we test two things:
        # - aaa has been removed as ISO code from abkh1242
        # - aaa has been attached as ISO code to a newly created language
        self.assertEqual(self.api.languoid('aaa').name, 'Dia')
        langs = list(self.api.languoids())
        self.assertIn('newg1234', self.api.glottocodes)
        self.assertEqual(len([l for l in langs if l.name == 'New Group']), 1)
        self.assertEqual(len([l for l in langs if l.hid == 'NOCODE_New-name']),
                         1)

        # Test ISO code removal:
        self._set_lff(
            """# -*- coding: utf-8 -*-
Kabardian [kaba1278]
    Dia []
""", 'dff.txt')
        lff2tree(self.api)
        self.assertIsNone(self.api.languoid('aaa'))

        tree2lff(self.api)

        # Test hid adding
        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkarauax [bezs1238]NOCODE_abc
""", 'dff.txt')
        lff2tree(self.api)
        self.assertEqual(self.api.languoid('bezs1238').hid, 'NOCODE_abc')

        #
        # Nodes must have unique names!
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkaraua [ashk1247]xyz
    Ashkaraua [bezs1238]
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'duplicate'):
            lff2tree(self.api)

        #
        # Nodes must have consistent names!
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
Ashkxxxaraua [ashk1247]xyz
    Bezshagh [bezs1238]
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'inconsistent'):
            lff2tree(self.api)

        #
        # Top-level nodes in dff must be languages:
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Bezshagh [bezs1238]
""", 'dff.txt')
        with self.assertRaises(ValueError):
            lff2tree(self.api)

        #
        # Top-level nodes in dff must be languages in lff:
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
None [xyzz1234]
    Dia []
""", 'dff.txt')
        with self.assertRaises(ValueError):
            lff2tree(self.api)

        #
        # Isolates must not have multiple ancestors:
        #
        self._set_lff(
            """# -*- coding: utf-8 -*-
None [xyzz1234]; Other [-isolate-]
    Dia []
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'isolate'):
            lff2tree(self.api)

        #
        # Languages must appear after a classification line:
        #
        self._set_lff("""# -*- coding: utf-8 -*-
    Dia []
""", 'dff.txt')
        with self.assertRaisesRegexp(ValueError, 'classification'):
            lff2tree(self.api)
Example #5
0
def tree2lff(args):
    """Create lff.txt and dff.txt from the current languoid tree.

    glottolog tree2lff
    """
    lff.tree2lff(args.repos, args.log)
Example #6
0
def run(args):
    lff.tree2lff(args.repos, args.log)
Example #7
0
def tree2lff(args):
    """Create lff.txt and dff.txt from the current languoid tree.

    glottolog tree2lff
    """
    lff.tree2lff(tree=languoids_path('tree', repos=args.repos))
Example #8
0
def tree2lff(args):
    """Create lff.txt and dff.txt from the current languoid tree.

    glottolog tree2lff
    """
    lff.tree2lff()