Ejemplo n.º 1
0
    def test_factory(self):
        from pyglottolog.languoids import Languoid, Level

        f = Languoid.from_dir(self.tree.joinpath('abcd1234'))
        self.assertEqual(f.category, 'Family')
        l = Languoid.from_dir(self.tree.joinpath(f.id, 'abcd1235'))
        self.assertEqual(l.name, 'language')
        self.assertEqual(l.level, Level.language)
        self.assertAlmostEqual(l.latitude, 0.5)
        self.assertAlmostEqual(l.longitude, 0.5)
        l.latitude, l.longitude = 1.0, 1.0
        self.assertAlmostEqual(l.latitude, 1.0)
        self.assertAlmostEqual(l.longitude, 1.0)
        self.assertEqual(l.iso_code, 'abc')
        l.iso_code = 'cde'
        self.assertEqual(l.iso, 'cde')
        self.assertEqual(l.hid, 'abc')
        l.hid = 'abo'
        self.assertEqual(l.hid, 'abo')
        self.assertEqual(l.id, 'abcd1235')
        self.assertEqual(l.macroareas, ['a', 'b'])
        l.macroareas = ['a']
        self.assertEqual(l.macroareas, ['a'])
        self.assertEqual(l.parent, f)
        self.assertEqual(f.children[0], l)
        self.assertEqual(l.children[0].family, f)
        l.write_info(self.tmp_path('new').as_posix())
Ejemplo n.º 2
0
    def test_lang2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        lang2tree(
            Languoid.from_name_id_level('name', 'abcd1234', Level.language),
            [('parent', 'abcd1233', Level.family)], old, {})
        assert old.joinpath('abcd1233', 'abcd1234', 'abcd1234.ini').exists()
        lang2tree(
            Languoid.from_name_id_level('name', 'abcd1234', Level.language),
            [('parent', 'abcd1233', Level.family)], new,
            {l.id: l
             for l in walk_tree(old)})
        assert new.joinpath('abcd1233', 'abcd1234', 'abcd1234.ini').exists()
Ejemplo n.º 3
0
def read_lff(level, fp=None, dry_run=False):
    assert isinstance(level, Level)
    lang_line = re.compile("\s+" + NAME_AND_ID_REGEX + "(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$")
    class_line = re.compile(NAME_AND_ID_REGEX + "(,\s*" + NAME_AND_ID_REGEX + ")*$")
    isolate_line = re.compile("([^\[]+)(\[-isolate-\])$")

    path = None
    with fp or build_path("%sff.txt" % level.name[0]).open(encoding="utf8") as fp:
        for line in fp:
            line = line.rstrip()
            if line.startswith("#") or not line.strip():
                # ignore comments or empty lines
                continue
            match = lang_line.match(line)
            if match:
                assert path
                yield Languoid.from_lff(None if path == "isolate" else path, line.strip(), level, dry_run=dry_run)
            else:
                match = isolate_line.match(line)
                if match:
                    path = "isolate"
                else:
                    # assert it matches a classification line!
                    if not class_line.match(line):
                        raise ValueError(line)
                    path = line.strip()
Ejemplo n.º 4
0
def read_lff(level, fp=None):
    lang_line = re.compile('\s+' + NAME_AND_ID_REGEX + '(\[([a-z]{3})?\])$')
    class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX + ')*$')
    isolate_line = re.compile('([^\[]+)(\[-isolate-\])$')

    path = None
    with fp or build_path('%sff.txt' % level[0]).open(encoding='utf8') as fp:
        for line in fp:
            line = line.rstrip()
            if line.startswith('#') or not line.strip():
                # ignore comments or empty lines
                continue
            match = lang_line.match(line)
            if match:
                assert path
                yield Languoid.from_lff(
                    None if path == 'isolate' else path, line.strip(), level)
            else:
                match = isolate_line.match(line)
                if match:
                    path = 'isolate'
                else:
                    # assert it matches a classification line!
                    if not class_line.match(line):
                        raise ValueError(line)
                    path = line.strip()
Ejemplo n.º 5
0
def read_lff(level, fp=None, dry_run=False):
    assert isinstance(level, Level)
    lang_line = re.compile('\s+' + NAME_AND_ID_REGEX +
                           '(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$')
    class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX +
                            ')*$')
    isolate_line = re.compile('([^\[]+)(\[-isolate-\])$')

    path = None
    with fp or build_path(
            '%sff.txt' % level.name[0]).open(encoding='utf8') as fp:
        for line in fp:
            line = line.rstrip()
            if line.startswith('#') or not line.strip():
                # ignore comments or empty lines
                continue
            match = lang_line.match(line)
            if match:
                assert path
                yield Languoid.from_lff(None if path == 'isolate' else path,
                                        line.strip(),
                                        level,
                                        dry_run=dry_run)
            else:
                match = isolate_line.match(line)
                if match:
                    path = 'isolate'
                else:
                    # assert it matches a classification line!
                    if not class_line.match(line):
                        raise ValueError(line)
                    path = line.strip()
Ejemplo n.º 6
0
def lang2tree(lang, lineage, out, old_tree):
    groupdir = out

    for name, id_, level in lineage:
        groupdir = groupdir.joinpath(id_)
        if not groupdir.exists():
            groupdir.mkdir()
            if id_ in old_tree:
                group = old_tree[id_]
                assert group.level == level
                if name != group.name:
                    # rename a subgroup!
                    group.name = name
            else:
                group = Languoid.from_name_id_level(name, id_, level)
            group.write_info(groupdir)

    langdir = groupdir.joinpath(lang.fname())
    langdir.mkdir()

    if lang.id in old_tree:
        old_lang = old_tree[lang.id]
        assert old_lang.level == lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        if old_lang.iso != lang.iso:
            old_lang.iso = lang.iso
        old_lang.write_info(langdir)
    else:
        lang.write_info(langdir)
Ejemplo n.º 7
0
    def test_isolate(self):
        from pyglottolog.languoids import Languoid

        l = Languoid.from_dir(self.tree.joinpath('isol1234'))
        self.assertTrue(l.isolate)
        self.assertIsNone(l.parent)
        self.assertIsNone(l.family)
Ejemplo n.º 8
0
def lang2tree(lang, lineage, out, old_tree):
    groupdir = out

    for name, id_, level in lineage:
        groupdir = groupdir.joinpath(id_)
        if not groupdir.exists():
            groupdir.mkdir()
            if id_ in old_tree:
                group = old_tree[id_]
                assert group.level == level
                if name != group.name:
                    # rename a subgroup!
                    group.name = name
            else:
                group = Languoid.from_name_id_level(name, id_, level)
            group.write_info(groupdir)

    langdir = groupdir.joinpath(lang.fname())
    langdir.mkdir()

    if lang.id in old_tree:
        old_lang = old_tree[lang.id]
        assert old_lang.level == lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        if old_lang.iso != lang.iso:
            old_lang.iso = lang.iso
        old_lang.write_info(langdir)
    else:
        lang.write_info(langdir)
Ejemplo n.º 9
0
    def test_lang2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        lang2tree(
            Languoid.from_name_id_level('name', 'abcd1234', Level.language),
            [('parent', 'abcd1233', Level.family)],
            old,
            {})
        assert old.joinpath('abcd1233', 'abcd1234', 'abcd1234.ini').exists()
        lang2tree(
            Languoid.from_name_id_level('name', 'abcd1234', Level.language),
            [('parent', 'abcd1233', Level.family)],
            new,
            {l.id: l for l in walk_tree(old)})
        assert new.joinpath('abcd1233', 'abcd1234', 'abcd1234.ini').exists()
Ejemplo n.º 10
0
    def test_factory(self):
        f = Languoid.from_dir(self.api.tree.joinpath('abcd1234'))
        self.assertEqual(f.category, 'Family')
        l = Languoid.from_dir(self.api.tree.joinpath(f.id, 'abcd1235'))
        self.assertEqual(l.name, 'language')
        self.assertIn('abcd1235', repr(l))
        self.assertIn('language', '%s' % l)
        self.assertEqual(l.level, Level.language)
        self.assertAlmostEqual(l.latitude, 0.5)
        self.assertAlmostEqual(l.longitude, 0.5)
        l.latitude, l.longitude = 1.0, 1.0
        self.assertAlmostEqual(l.latitude, 1.0)
        self.assertAlmostEqual(l.longitude, 1.0)
        self.assertEqual(l.iso_code, 'abc')
        l.iso_code = 'cde'
        self.assertEqual(l.iso, 'cde')
        self.assertEqual(l.hid, 'abc')
        l.hid = 'abo'
        self.assertEqual(l.hid, 'abo')
        self.assertEqual(l.id, 'abcd1235')

        self.assertEqual(len(l.macroareas), 2)
        l.macroareas = [Macroarea.africa]
        self.assertEqual(l.macroareas, [Macroarea.africa])

        l.countries = self.api.countries[:2]
        self.assertEqual(len(l.countries), 2)

        self.assertEqual(l.parent, f)
        self.assertEqual(f.children[0], l)
        self.assertEqual(l.children[0].family, f)
        l.write_info(self.tmp_path().as_posix())
        self.assertTrue(self.tmp_path('abcd1235').exists())
        self.assertIsInstance(
            self.api.languoid('abcd1235').iso_retirement.asdict(), dict)
        self.assertIsNone(l.classification_comment)
        l.endangerment = 'Critically endangered'
        self.assertEqual(l.endangerment, EndangermentStatus.critical)
        self.assertEqual(l.names, {})
        l.cfg['altnames'] = {'glottolog': 'xyz'}
        self.assertIn('glottolog', l.names)
        self.assertEqual(l.identifier, {})
        l.cfg['identifier'] = {'multitree': 'xyz'}
        self.assertIn('multitree', l.identifier)
Ejemplo n.º 11
0
def test_factory(tmpdir, api_copy):
    f = Languoid.from_dir(api_copy.tree / 'abcd1234')
    assert f.category == 'Family'
    l = Languoid.from_dir(api_copy.tree / f.id / 'abcd1235')
    assert l.name == 'language'
    assert 'abcd1235' in repr(l)
    assert 'language' in '%s' % l
    assert l.level == Level.language
    assert l.latitude == pytest.approx(0.5)
    assert l.longitude == pytest.approx(0.5)
    l.latitude, l.longitude = 1.0, 1.0
    assert l.latitude == pytest.approx(1.0)
    assert l.longitude == pytest.approx(1.0)
    assert l.iso_code == 'abc'
    l.iso_code = 'cde'
    assert l.iso == 'cde'
    assert l.hid == 'abc'
    l.hid = 'abo'
    assert l.hid == 'abo'
    assert l.id == 'abcd1235'

    assert len(l.macroareas) == 2
    l.macroareas = [Macroarea.africa]
    assert l.macroareas == [Macroarea.africa]

    l.countries = api_copy.countries[:2]
    assert len(l.countries) == 2

    assert l.parent == f
    assert f.children[0] == l
    assert l.children[0].family == f
    l.write_info(str(tmpdir))
    assert (tmpdir / 'abcd1235').exists()
    assert isinstance(
        api_copy.languoid('abcd1235').iso_retirement.asdict(), dict)
    assert l.classification_comment is None
    l.endangerment = 'nearly extinct'
    assert l.endangerment == EndangermentStatus.critical
    assert l.names == {}
    l.cfg['altnames'] = {'glottolog': 'xyz'}
    assert 'glottolog' in l.names
    assert l.identifier == {}
    l.cfg['identifier'] = {'multitree': 'xyz'}
    assert 'multitree' in l.identifier
Ejemplo n.º 12
0
def test_attrs(api):
    l = Languoid.from_name_id_level(api.tree, 'name', 'abcd1235', 'language', hid='NOCODE')
    l.name = 'other'
    assert l.name == 'other'
    with pytest.raises(AttributeError):
        l.glottocode = 'x'
    with pytest.raises(AttributeError):
        l.id = 'x'
    assert l.id == l.glottocode
    assert l.hid == 'NOCODE'
Ejemplo n.º 13
0
    def test_attrs(self):
        from pyglottolog.languoids import Languoid, Level

        l = Languoid.from_name_id_level('name', 'abcd1235', Level.language, hid='NOCODE')
        l.name = 'other'
        self.assertEqual(l.name, 'other')
        with self.assertRaises(ValueError):
            l.glottocode = 'x'
        with self.assertRaises(ValueError):
            l.id = 'x'
        self.assertEqual(l.id, l.glottocode)
        self.assertEqual(l.hid, 'NOCODE')
Ejemplo n.º 14
0
def run(args):
    if Glottocode.pattern.match(args.parent):
        args.parent = get_languoid(args, args.parent).dir
    else:
        args.parent = pathlib.Path(args.parent)
        if not args.parent.exists():
            raise ParserError('invalid parent dir specified')

    lang = Languoid.from_name_id_level(
        args.parent, args.name, args.repos.glottocodes.new(args.name),
        args.level, **dict(prop.split('=') for prop in args.props))

    print("Info written to %s" % lang.write_info(outdir=args.parent))
Ejemplo n.º 15
0
    def test_lff2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        _l = """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235][uby]
Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241][abq]
    Abkhazian [abkh1244][abk]
Abkhaz-Adyge [abkh1242], Circassian [circ1239]
    Adyghe [adyg1241][ady]
    Kabardian [kaba1278][kbd]
"""

        _d = """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Ashkaraua [ashk1247][]
    Bezshagh [bezs1238][]
    Tapanta [tapa1256][]
Abkhazian [abkh1244]
    Abzhui [abzh1238][]
    Bzyb [bzyb1238][]
    Samurzakan [samu1242][]
"""

        def lffs():
            return {Level.language: lff(_l), Level.dialect: lff(_d)}

        lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs())
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs())
        tree2lff(
            new,
            out_paths={
                Level.language: self.tmp_path('lff'),
                Level.dialect: self.tmp_path('dff')
            }
        )
        with self.tmp_path('lff').open() as fp:
            self.assertEqual(fp.read(), _l)

        with self.tmp_path('dff').open() as fp:
            self.assertEqual(fp.read(), _d)

        lffs_ = {Level.language: lff(_l.replace('Abaza', 'Abazul')),
                 Level.dialect: lff(_d)}
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_)
        l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241'))
        self.assertEqual(l.name, 'Abazul')
        self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
Ejemplo n.º 16
0
    def test_lff2tree(self):
        old, new = self.tmp_path('old'), self.tmp_path('new')
        old.mkdir()
        new.mkdir()

        _l = """# -*- coding: utf-8 -*-
Abkhaz-Adyge [abkh1242]
    Ubykh [ubyk1235][uby]
Abkhaz-Adyge [abkh1242], Abkhaz-Abaza [abkh1243]
    Abaza [abaz1241][abq]
    Abkhazian [abkh1244][abk]
Abkhaz-Adyge [abkh1242], Circassian [circ1239]
    Adyghe [adyg1241][ady]
    Kabardian [kaba1278][kbd]
"""

        _d = """# -*- coding: utf-8 -*-
Abaza [abaz1241]
    Ashkaraua [ashk1247][]
    Bezshagh [bezs1238][]
    Tapanta [tapa1256][]
Abkhazian [abkh1244]
    Abzhui [abzh1238][]
    Bzyb [bzyb1238][]
    Samurzakan [samu1242][]
"""

        def lffs():
            return {Level.language: lff(_l), Level.dialect: lff(_d)}

        lff2tree(old, builddir=self.tmp_path('build1'), lffs=lffs())
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs())
        tree2lff(new,
                 out_paths={
                     Level.language: self.tmp_path('lff'),
                     Level.dialect: self.tmp_path('dff')
                 })
        with self.tmp_path('lff').open() as fp:
            self.assertEqual(fp.read(), _l)

        with self.tmp_path('dff').open() as fp:
            self.assertEqual(fp.read(), _d)

        lffs_ = {
            Level.language: lff(_l.replace('Abaza', 'Abazul')),
            Level.dialect: lff(_d)
        }
        lff2tree(old, new, builddir=self.tmp_path('build2'), lffs=lffs_)
        l = Languoid.from_dir(new.joinpath('abkh1242', 'abkh1243', 'abaz1241'))
        self.assertEqual(l.name, 'Abazul')
        self.assertEqual(l.parent.name, 'Abkhaz-Abazul')
Ejemplo n.º 17
0
def create(args):
    """Create a new languoid directory for a languoid specified by name and level.

    glottolog create <parent> <name> <level>
    """
    assert args.args[2] in ['family', 'language', 'dialect']
    parent = args.repos.languoid(args.args[0]) or None
    outdir = parent.dir if parent else args.repos.tree
    lang = Languoid.from_name_id_level(
        outdir, args.args[1], args.repos.glottocodes.new(args.args[1]),
        getattr(Level, args.args[2]),
        **dict(prop.split('=') for prop in args.args[3:]))

    print("Info written to %s" % lang.write_info(outdir=outdir))
Ejemplo n.º 18
0
 def test_attrs(self):
     l = Languoid.from_name_id_level(self.api.tree,
                                     'name',
                                     'abcd1235',
                                     Level.language,
                                     hid='NOCODE')
     l.name = 'other'
     self.assertEqual(l.name, 'other')
     with self.assertRaises(AttributeError):
         l.glottocode = 'x'
     with self.assertRaises(AttributeError):
         l.id = 'x'
     self.assertEqual(l.id, l.glottocode)
     self.assertEqual(l.hid, 'NOCODE')
Ejemplo n.º 19
0
def lang2tree(api, log, lang, lineage, out, old_tree):
    groupdir = out

    for spec in lineage:
        hid = -1
        name, id_, level = spec[:3]
        if len(spec) == 4:
            hid = spec[3]

        groupdir = groupdir.joinpath(id_)
        if not groupdir.exists():
            groupdir.mkdir()
            if id_ in old_tree:
                group = old_tree[id_]
                if group.level != level:
                    log.info('{0} from {1} to {2}'.format(
                        group, group.level, level))
                    group.level = level
                if name != group.name:
                    # rename a subgroup!
                    group.name = name
            else:
                group = Languoid.from_name_id_level(api.tree, name, id_, level)

            if hid != -1:
                if (hid in api.iso or hid is None) and group.iso != hid:
                    group.iso = hid
                if hid != group.hid:
                    group.hid = hid
            group.write_info(groupdir)

    langdir = groupdir.joinpath(lang.id)
    langdir.mkdir()

    if lang.id in old_tree:
        old_lang = old_tree[lang.id]
        if old_lang.level != lang.level:
            log.info('{0} from {1} to {2}'.format(old_lang, old_lang.level,
                                                  lang.level))
            old_lang.level = lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        if old_lang.iso != lang.iso:
            old_lang.iso = lang.iso
        if lang.hid and old_lang.hid != lang.hid:
            old_lang.hid = lang.hid
        old_lang.write_info(langdir)
    else:
        lang.write_info(langdir)
Ejemplo n.º 20
0
def new_languoid(args):
    """Create a new languoid directory for a languoid specified by name and level.

    glottolog new_languoid <name> <level>
    """
    assert args.args[1] in ['family', 'language', 'dialect']
    lang = Languoid.from_name_id_level(
        args.args[0],
        Glottocode.from_name(args.args[0]),
        args.args[1],
        **dict(prop.split('=') for prop in args.args[2:]))
    #
    # FIXME: how to specify parent? Just mv there?
    #
    print("Info written to %s" % lang.write_info())
Ejemplo n.º 21
0
def languoid(api, log, new, path, lname, glottocode, isocode, level):
    if not glottocode:
        glottocode = new.get((lname, level))
    if not glottocode:
        new[lname, level] = glottocode = api.glottocodes.new(lname)

    lineage = []
    if path:
        for i, (name, id_, hid) in enumerate(path):
            if id_ == ISOLATE_ID:
                if i != 0 or len(path) != 1:
                    log.error(
                        'invalid classification line for languoid: {0} [{1}]'.
                        format(lname, glottocode))
                    raise ValueError('invalid isolate line')
                break
            _level = Level.family
            if level == Level.dialect:
                _level = Level.language if i == 0 else Level.dialect

            if not id_:
                id_ = new.get((name, _level))
            if not id_:
                new[name, _level] = id_ = api.glottocodes.new(name)

            lineage.append((name, id_, _level, hid))

    lang = Languoid.from_name_id_level(api.tree,
                                       lname,
                                       glottocode,
                                       level,
                                       lineage=[(r[0], r[1], r[2])
                                                for r in lineage])
    if (isocode in api.iso) or (isocode is None):
        lang.iso = isocode
    lang.hid = isocode
    return lang, lineage
Ejemplo n.º 22
0
def test_isolate(api):
    l = Languoid.from_dir(api.tree / 'isol1234')
    assert l.isolate
    assert l.parent is None
    assert l.family is None
Ejemplo n.º 23
0
 def test_isolate(self):
     l = Languoid.from_dir(self.api.tree.joinpath('isol1234'))
     self.assertTrue(l.isolate)
     self.assertIsNone(l.parent)
     self.assertIsNone(l.family)
Ejemplo n.º 24
0
def test_factory_without_api(api_copy):
    f = Languoid.from_dir(api_copy.tree / 'abcd1234', _api=api_copy)
    l = Languoid.from_dir(api_copy.tree / f.id / 'abcd1235')
    assert len(
        l.macroareas) == 0  # No API passed at initialization => no macroareas!