Beispiel #1
0
    def test_wl2qlc(self):
        stamp = 'test-stamp'
        out = self.tmp_path('test')

        wl2qlc(self.wordlist.header, self.wordlist._data,
               filename=out.as_posix(), stamp=stamp)
        out = self.tmp_path('test.qlc')

        with out.open(encoding='utf8') as fp:
            self.assertTrue(fp.read().endswith(stamp))

        # load a worldist with alignments and otuput it as string with msapairs
        tmp = Alignments(test_data('good_file.tsv'), ref='cogid')
        tmp.align(ref="cogid")

        wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=out.as_posix(),
               stamp='stampo', ignore=[])
        tmp.get_consensus(ref="cogid")

        wl2qlc([h.upper()
                for h in sorted(tmp.header, key=lambda x: tmp.header[x])],
               tmp._data, meta=tmp._meta, filename=out.as_posix(),
               stamp='stampo', ignore=[], formatter="doculect,concept")
        wl2qlc([h.upper()
                for h in sorted(tmp.header, key=lambda x: tmp.header[x])],
               tmp._data, meta=tmp._meta, filename=out.as_posix(),
               stamp='stampo', ignore=[], formatter="doculect")
Beispiel #2
0
    def test_wl2qlc(self):
        stamp = 'test-stamp'
        out = self.tmp_path('test')

        wl2qlc(self.wordlist.header,
               self.wordlist._data,
               filename=out.as_posix(),
               stamp=stamp)
        out = self.tmp_path('test.qlc')

        with out.open(encoding='utf8') as fp:
            self.assertTrue(fp.read().endswith(stamp))

        # load a worldist with alignments and otuput it as string with msapairs
        tmp = Alignments(test_data('good_file.tsv'), ref='cogid')
        tmp.align(ref="cogid")

        wl2qlc(tmp.header,
               tmp._data,
               meta=tmp._meta,
               filename=out.as_posix(),
               stamp='stampo',
               ignore=[])
        tmp.get_consensus(ref="cogid")

        wl2qlc([
            h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])
        ],
               tmp._data,
               meta=tmp._meta,
               filename=out.as_posix(),
               stamp='stampo',
               ignore=[],
               formatter="doculect,concept")
        wl2qlc([
            h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])
        ],
               tmp._data,
               meta=tmp._meta,
               filename=out.as_posix(),
               stamp='stampo',
               ignore=[],
               formatter="doculect")
Beispiel #3
0
def test_wl2qlc(tmppath, test_data, wordlist):
    stamp = 'test-stamp'
    out = tmppath / 'test'

    wl2qlc(wordlist.header, wordlist._data, filename=str(out), stamp=stamp)
    out = tmppath / 'test.qlc'
    assert out.read_text(encoding='utf8').endswith(stamp)

    # load a worldist with alignments and otuput it as string with msapairs
    tmp = Alignments(str(test_data / 'good_file.tsv'), ref='cogid')
    tmp.align(ref="cogid")

    wl2qlc(tmp.header,
           tmp._data,
           meta=tmp._meta,
           filename=str(out),
           stamp='stampo',
           ignore=[])
    tmp.get_consensus(ref="cogid")

    wl2qlc(
        [h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])],
        tmp._data,
        meta=tmp._meta,
        filename=out.as_posix(),
        stamp='stampo',
        ignore=[],
        formatter="doculect,concept")
    wl2qlc(
        [h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])],
        tmp._data,
        meta=tmp._meta,
        filename=out.as_posix(),
        stamp='stampo',
        ignore=[],
        formatter="doculect")
Beispiel #4
0
 def setUp(self):
     WithTempDir.setUp(self)
     self.alm = Alignments(test_data('KSL2.qlc'),
                           loans=False,
                           _interactive=False)
     self.alm.align()
Beispiel #5
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'),
                              loans=False,
                              _interactive=False)
        self.alm.align()

    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm:  #.get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokensA = self.alm[key, 'tokensa'].split(' ')
            tokensB = self.alm[key, 'tokensb'].split(' ')

            new_tokensA = lp.ipa2tokens(ipa,
                                        merge_vowels=True,
                                        merge_geminates=False)
            new_tokensB = lp.ipa2tokens(ipa,
                                        merge_vowels=False,
                                        merge_geminates=False)
            assert tokensA == new_tokensA
            assert tokensB == new_tokensB

    def test_align(self):
        self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x))
        self.alm.add_alignments(ref="cugid")

        # align all sequences using standard params
        self.alm.align(ref="cugid", alignment="alignment2")
        assert self.alm.msa["cugid"]["1"]["ID"] == self.alm.msa["cogid"][1][
            "ID"]

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msaA = lp.SCA(value)
            msaB = lp.Multiple(value['seqs'])
            msaB.prog_align()
            assert msaA == msaB

            # now compare with different flag
            msaA = lp.Multiple(
                [self.alm[idx, 'tokensb'] for idx in value['ID']])
            msaB = lp.Multiple([''.join(s) for s in value['seqs']],
                               merge_vowels=False)
            msaA.lib_align()
            msaB.lib_align()
            assert msaA == msaB

    def test_get_consensus(self):
        # align all sequences using standard params

        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        self.assertEqual(
            self.alm.get_list(language="Turkish", entry="consensus",
                              flat=True),
            [
                ''.join(x) for x in self.alm.get_list(
                    language="Turkish", entry="tokens", flat=True)
            ])

    def test_get_confidence(self):

        lex = LexStat(test_data('KSL3.qlc'))
        tmpDict = dict([(k, lex[k, 'numbers']) for k in lex])
        self.alm.add_entries('numbers', tmpDict, lambda x: x)
        corrs = self.alm.get_confidence(lex.rscorer, ref='cogid')
        self.alm.output('html',
                        filename=text_type(self.tmp_path('alm')),
                        confidence=True)

    def test_output(self):
        self.alm.output('tsv', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))
Beispiel #6
0
 def setUp(self):
     WithTempDir.setUp(self)
     self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False)
Beispiel #7
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False)
    
    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm: #.get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokensA = self.alm[key, 'tokensa'].split(' ')
            tokensB = self.alm[key, 'tokensb'].split(' ')

            new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True)
            new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False)
            assert tokensA == new_tokensA
            assert tokensB == new_tokensB

    def test_align(self):
        # align all sequences using standard params
        self.alm.align()

        # iterate and align using the multiple function
        for key,value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msaA = lp.SCA(value)
            msaB = lp.Multiple(value['seqs'])
            msaB.prog_align()
            assert msaA == msaB

            # now compare with different flag
            msaA = lp.Multiple([self.alm[idx,'tokensb'] for idx in value['ID']])
            msaB = lp.Multiple([''.join(s) for s in value['seqs']],merge_vowels=False)
            msaA.lib_align()
            msaB.lib_align()
            assert msaA == msaB

    def test_get_consensus(self):
        # align all sequences using standard params
        self.alm.align()

        tree = TreeNode(
            Name='root',
            Children=[TreeNode(Name=line.split('\t')[1]) for line in
                      read_config_file(test_data('KSL2.qlc'))])

        self.alm.get_consensus(consensus="consensus", tree=tree)
        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        assert self.alm.get_list(
                    language="Turkish",
                    entry="consensus",
                    flat=True
                    ) == \
                            [''.join(x) for x in self.alm.get_list(
                                language="Turkish",
                                entry="tokens",
                                flat=True
                                )
                                ]

    def test_output(self):
        self.alm.align()
        self.alm.output('qlc', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))
Beispiel #8
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'), loans=False,
                              _interactive=False)
        self.alm.align()

    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm:  # get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokens_a = self.alm[key, 'tokensa'].split(' ')
            tokens_b = self.alm[key, 'tokensb'].split(' ')

            new_tokens_a = lp.ipa2tokens(ipa, merge_vowels=True,
                                         merge_geminates=False)
            new_tokens_b = lp.ipa2tokens(ipa, merge_vowels=False,
                                         merge_geminates=False)
            assert tokens_a == new_tokens_a
            assert tokens_b == new_tokens_b

    def test_align(self):
        self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x))
        self.alm.add_alignments(ref="cugid")

        # align all sequences using standard params
        self.alm.align(ref="cugid", alignment="alignment2")
        assert (self.alm.msa["cugid"]["1"]["ID"] ==
                self.alm.msa["cogid"][1]["ID"])

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msa_a = lp.SCA(value)
            msa_b = lp.Multiple(value['seqs'])
            msa_b.prog_align()
            assert msa_a == msa_b

            # now compare with different flag
            msa_a = lp.Multiple([self.alm[idx, 'tokensb']
                                 for idx in value['ID']])
            msa_b = lp.Multiple([''.join(s) for s in value['seqs']],
                                merge_vowels=False)
            msa_a.lib_align()
            msa_b.lib_align()
            assert msa_a == msa_b

    def test_get_consensus(self):
        # align all sequences using standard params
        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        self.assertEqual(
            self.alm.get_list(language="Turkish", entry="consensus", flat=True),
            [''.join(x) for x in
             self.alm.get_list(language="Turkish", entry="tokens", flat=True)])

    def test_get_confidence(self):
        lex = LexStat(test_data('KSL3.qlc'))
        tmp_dict = dict([(k, lex[k, 'numbers']) for k in lex])
        self.alm.add_entries('numbers', tmp_dict, lambda x: x)
        # Run get_confidence to populate the output variable.
        # TODO: Check and document side-effects of this.
        _ = self.alm.get_confidence(lex.rscorer, ref='cogid')
        self.alm.output('html', filename=text_type(self.tmp_path('alm')),
                        confidence=True)

    def test_output(self):
        self.alm.output('tsv', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))
Beispiel #9
0
def alm(test_data):
    a = Alignments(str(test_data / 'KSL2.qlc'), loans=False,_interactive=False)
    a.align()
    return a
Beispiel #10
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'),
                              loans=False,
                              _interactive=False)

    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm:  #.get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokensA = self.alm[key, 'tokensa'].split(' ')
            tokensB = self.alm[key, 'tokensb'].split(' ')

            new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True)
            new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False)
            assert tokensA == new_tokensA
            assert tokensB == new_tokensB

    def test_align(self):
        # align all sequences using standard params
        self.alm.align()

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msaA = lp.SCA(value)
            msaB = lp.Multiple(value['seqs'])
            msaB.prog_align()
            assert msaA == msaB

            # now compare with different flag
            msaA = lp.Multiple(
                [self.alm[idx, 'tokensb'] for idx in value['ID']])
            msaB = lp.Multiple([''.join(s) for s in value['seqs']],
                               merge_vowels=False)
            msaA.lib_align()
            msaB.lib_align()
            assert msaA == msaB

    def test_get_consensus(self):
        # align all sequences using standard params
        self.alm.align()

        tree = TreeNode(Name='root',
                        Children=[
                            TreeNode(Name=line.split('\t')[1])
                            for line in read_config_file(test_data('KSL2.qlc'))
                        ])

        self.alm.get_consensus(consensus="consensus", tree=tree)
        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        assert self.alm.get_list(
                    language="Turkish",
                    entry="consensus",
                    flat=True
                    ) == \
                            [''.join(x) for x in self.alm.get_list(
                                language="Turkish",
                                entry="tokens",
                                flat=True
                                )
                                ]

    def test_output(self):
        self.alm.align()
        self.alm.output('qlc', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))