Esempio n. 1
0
class Tests(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.lex = LexStat(test_data('KSL.qlc'))
        self.part = Partial(test_data('partial_cognates.tsv'),
                            segments='segments')
        self.part.add_entries('pid1', 'partial_cognate_sets', lambda x: x)
        self.part.add_entries('pid2', 'partialids2',
                              lambda x: [int(y) for y in x.split(' ')])

    def test_bcubes(self):
        from lingpy.evaluate.acd import bcubes

        res = bcubes(self.lex, test='cogid', pprint=False)
        self.assertAlmostEquals(res, (1.0, 1.0, 1.0))

        res = bcubes(self.lex, 'cogid', 'cogid', pprint=True, per_concept=True)

    def test_partial_bcubes(self):
        from lingpy.evaluate.acd import partial_bcubes
        res = partial_bcubes(self.part, 'pid1', 'pid2', pprint=False)
        assert [round(x, 2) for x in res] == [0.92, 0.98, 0.95]

        res = partial_bcubes(self.part, 'pid1', 'pid2', pprint=True)

    def test_pairs(self):
        from lingpy.evaluate.acd import pairs

        res = pairs(self.lex, test='cogid', pprint=False)
        self.assertAlmostEquals(res, (1.0, 1.0, 1.0))

    def test_diff(self):
        from lingpy.evaluate.acd import diff

        res = diff(self.lex, test='cogid', tofile=False, pprint=False)
        self.assertAlmostEquals(res, ((1.0, 1.0, 1.0), (1.0, 1.0, 1.0)))
        self.lex.add_entries('cugid', 'cogid', lambda x: x + 1
                             if x % 2 else x * x)
        d1 = diff(self.lex,
                  gold='cogid',
                  test='cogid',
                  filename='%s' % self.tmp_path('test_acd'),
                  pprint=False)
        d2 = diff(self.lex,
                  gold='cugid',
                  test='cogid',
                  filename='%s' % self.tmp_path('test_acd'),
                  pprint=False,
                  tofile=False)
        d3 = diff(self.lex,
                  gold='cugid',
                  test='cogid',
                  filename='%s' % self.tmp_path('test_acd'),
                  pprint=False,
                  tofile=True)
        assert d2[0] != 1
Esempio n. 2
0
class Tests(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.lex = LexStat(test_data('KSL.qlc'))
        self.part = Partial(test_data('partial_cognates.tsv'),
                            segments='segments')
        self.part.add_entries('pid1', 'partial_cognate_sets', lambda x: x)
        self.part.add_entries('pid2', 'partialids2',
                              lambda x: [int(y) for y in x.split(' ')])

    def test_bcubes(self):
        res = bcubes(self.lex, test='cogid', pprint=False)
        self.assertAlmostEqual(res, (1.0, 1.0, 1.0))

        _ = bcubes(self.lex, 'cogid', 'cogid', pprint=True, per_concept=True)

    def test_partial_bcubes(self):
        res = partial_bcubes(self.part, 'pid1', 'pid2', pprint=False)
        assert [round(x, 2) for x in res] == [0.92, 0.98, 0.95]

        _ = partial_bcubes(self.part, 'pid1', 'pid2', pprint=True)

    def test_pairs(self):
        res = pairs(self.lex, test='cogid', pprint=False)
        self.assertAlmostEqual(res, (1.0, 1.0, 1.0))

    def test_diff(self):
        res = diff(self.lex, test='cogid', tofile=False, pprint=False)
        self.assertAlmostEqual(res, ((1.0, 1.0, 1.0), (1.0, 1.0, 1.0)))
        self.lex.add_entries('cugid', 'cogid',
                             lambda x: x + 1 if x % 2 else x * x)

        _ = diff(self.lex, gold='cogid', test='cogid',
                 filename='%s' % self.tmp_path('test_acd'), pprint=False)
        d2 = diff(self.lex, gold='cugid', test='cogid',
                  filename='%s' % self.tmp_path('test_acd'),
                  pprint=False, tofile=False)
        _ = diff(self.lex, gold='cugid', test='cogid',
                 filename='%s' % self.tmp_path('test_acd'),
                 pprint=False, tofile=True)

        assert d2[0] != 1

    def test_random_cognates(self):
        random_cognates(self.lex, ref='randomid')
        assert 'randomid' in self.lex.header

    def test_extreme_cognates(self):
        extreme_cognates(self.lex, ref="lumperid", bias='lumper')
        assert self.lex[1, 'lumperid'] == self.lex[2, 'lumperid']
        extreme_cognates(self.lex, ref='splitterid', bias='splitter')
        assert self.lex[1, 'splitterid'] != self.lex[2, 'splitterid']
        assert_raises(ValueError, extreme_cognates, self.lex, bias='')
Esempio n. 3
0
class Tests(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.lex = LexStat(test_data('KSL.qlc'))
        self.part = Partial(test_data('partial_cognates.tsv'),
                            segments='segments')
        self.part.add_entries('pid1', 'partial_cognate_sets', lambda x: x)
        self.part.add_entries('pid2', 'partialids2',
                              lambda x: [int(y) for y in x.split(' ')])

    def test_bcubes(self):
        res = bcubes(self.lex, test='cogid', pprint=False)
        self.assertAlmostEquals(res, (1.0, 1.0, 1.0))

        _ = bcubes(self.lex, 'cogid', 'cogid', pprint=True, per_concept=True)

    def test_partial_bcubes(self):
        res = partial_bcubes(self.part, 'pid1', 'pid2', pprint=False)
        assert [round(x, 2) for x in res] == [0.92, 0.98, 0.95]

        _ = partial_bcubes(self.part, 'pid1', 'pid2', pprint=True)

    def test_pairs(self):
        res = pairs(self.lex, test='cogid', pprint=False)
        self.assertAlmostEquals(res, (1.0, 1.0, 1.0))

    def test_diff(self):
        res = diff(self.lex, test='cogid', tofile=False, pprint=False)
        self.assertAlmostEquals(res, ((1.0, 1.0, 1.0), (1.0, 1.0, 1.0)))
        self.lex.add_entries('cugid', 'cogid',
                             lambda x: x + 1 if x % 2 else x * x)

        _ = diff(self.lex, gold='cogid', test='cogid',
                 filename='%s' % self.tmp_path('test_acd'), pprint=False)
        d2 = diff(self.lex, gold='cugid', test='cogid',
                  filename='%s' % self.tmp_path('test_acd'),
                  pprint=False, tofile=False)
        _ = diff(self.lex, gold='cugid', test='cogid',
                 filename='%s' % self.tmp_path('test_acd'),
                 pprint=False, tofile=True)

        assert d2[0] != 1

    def test_random_cognates(self):
        random_cognates(self.lex, ref='randomid')
        assert 'randomid' in self.lex.header

    def test_extreme_cognates(self):
        extreme_cognates(self.lex, ref="lumperid", bias='lumper')
        assert self.lex[1, 'lumperid'] == self.lex[2, 'lumperid']
        extreme_cognates(self.lex, ref='splitterid', bias='splitter')
        assert self.lex[1, 'splitterid'] != self.lex[2, 'splitterid']
        assert_raises(ValueError, extreme_cognates, self.lex, bias='')
Esempio n. 4
0
class Tests(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.lex = LexStat(test_data('KSL.qlc'))
        self.part = Partial(test_data('partial_cognates.tsv'),
                segments='segments')
        self.part.add_entries('pid1', 'partial_cognate_sets', lambda x: x)
        self.part.add_entries('pid2', 'partialids2', lambda x: [int(y)
            for y in x.split(' ')])


    def test_bcubes(self):
        from lingpy.evaluate.acd import bcubes

        res = bcubes(self.lex, test='cogid', pprint=False)
        self.assertAlmostEquals(res, (1.0, 1.0, 1.0))

        res = bcubes(self.lex, 'cogid', 'cogid', pprint=True,
                per_concept=True)


    def test_partial_bcubes(self):
        from lingpy.evaluate.acd import partial_bcubes
        res = partial_bcubes(self.part, 'pid1', 'pid2', pprint=False)
        assert [round(x, 2) for x in res] == [0.92, 0.98, 0.95]

        res = partial_bcubes(self.part, 'pid1', 'pid2', pprint=True)
        
    def test_pairs(self):
        from lingpy.evaluate.acd import pairs

        res = pairs(self.lex, test='cogid', pprint=False)
        self.assertAlmostEquals(res, (1.0, 1.0, 1.0))

    def test_diff(self):
        from lingpy.evaluate.acd import diff

        res = diff(self.lex, test='cogid', tofile=False, pprint=False)
        self.assertAlmostEquals(res, ((1.0, 1.0, 1.0), (1.0, 1.0, 1.0)))
        self.lex.add_entries('cugid', 'cogid', lambda x: x+1 if x % 2 else x*x)
        d1 = diff(self.lex, gold='cogid', test='cogid', filename='%s' % self.tmp_path('test_acd'), pprint=False)
        d2 = diff(self.lex, gold='cugid', test='cogid', filename='%s' %
                self.tmp_path('test_acd'), pprint=False, tofile=False)
        d3 = diff(self.lex, gold='cugid', test='cogid', filename='%s' %
                self.tmp_path('test_acd'), pprint=False, tofile=True)
        assert d2[0] != 1
Esempio n. 5
0
def part(test_data):
    p = Partial(str(test_data / 'partial_cognates.tsv'), segments='segments')
    p.add_entries('pid1', 'partial_cognate_sets', lambda x: x)
    p.add_entries('pid2', 'partialids2',
                  lambda x: [int(y) for y in x.split(' ')])
    return p
from lingpy import *
from lingpy.compare.partial import Partial

try:
    part = Partial('hm-111-17.bin.tsv', segments='segments')
except:
    part = Partial('hm-111-17.tsv', segments='segments')
    part.get_scorer(runs=10000)
    part.output('tsv', filename='hm-111-17.bin')

# manually correct error in data
part.partial_cluster(method='lexstat',
                     cluster_method='infomap',
                     threshold=0.6,
                     ref='cogids')

part.add_entries('note', 'cogid', lambda x: '')
part.add_entries('morphemes', 'cogid', lambda x: '')
part.output('tsv', filename='hm-111-17-t06', ignore='all', prettify=False)
Esempio n. 7
0
from lingpy.compare.partial import Partial
from lingpy.convert.plot import plot_tree
from sys import argv
from clldutils.text import strip_brackets, split_text
from collections import defaultdict
from lingpy import basictypes

if 'all' in argv:
    fname='../output/A_Deepadung_'
else:
    fname='../output/D_Deepadung_'

part = Partial(fname+'crossids.tsv')
part.add_cognate_ids('crossids', 'crossid', idtype='strict')
part.add_entries('cog', 'crossid,concept', lambda x, y: str(x[y[0]])+x[y[1]])
part.renumber('cog')

part.calculate('distance', ref='cogid')
part.calculate('tree', tree_calc='neighbor')

part.output('dst', filename=fname+'distance')
part.output('tre', filename=fname+'tree')

if 'plot' in argv:
    plot_tree(str(part.tree), degree=350, filename=fname+'tree')