Esempio n. 1
0
def elcat_diff(args):  # pragma: no cover
    from pyglottolog.links.endangeredlanguages import read

    langs = list(args.repos.languoids())
    gl_isos = {l.iso for l in langs if l.iso}
    gl_names = {l.name for l in langs}
    aes = {}
    for l in langs:
        if l.endangerment and l.endangerment.source.id == 'ElCat':
            m = re.search('\((?P<id>[0-9]+)\-', l.endangerment.comment or '')
            if m:
                aes[int(m.group('id'))] = l

    in_gl = {}
    for l in langs:
        if l.identifier.get('multitree'):
            in_gl[l.identifier['multitree']] = l

    c = Counter()
    for i, l in enumerate(read()):
        if len(l.isos) > 1:
            print('+++ multiple codes: {0.name} [{0.id}][{0.isos}]'.format(l))
            c.update(['multiple'])
            continue
        if not l.isos:
            print('--- no codes: {0.name} [{0.id}]'.format(l))
            c.update(['none'])
            continue

        iso = l.isos[0]
        if iso in gl_isos:
            c.update(['iso match'])
            continue

        if iso in in_gl:
            c.update(['LL match'])
            continue

        if l.id in aes:
            c.update(['AES match'])
            continue

        if len(l.name) > 5 and l.name in gl_names:
            c.update(['name match'])
            continue

        print('~~~ no match: {0.name} [{0.id}][{0.isos}]'.format(l))
        c.update(['no match'])

    for k, v in c.most_common():
        print(k, v)
    print(sum(c.values()))
Esempio n. 2
0
def test_el(elcat):
    res = endangeredlanguages.read()
    assert len(res) == 1
    assert len(res[0].coordinates) == 1
    assert res[0].url.endswith('1')