Beispiel #1
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False)
    
    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm: #.get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokensA = self.alm[key, 'tokensa'].split(' ')
            tokensB = self.alm[key, 'tokensb'].split(' ')

            new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True, merge_geminates=False)
            new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False, merge_geminates=False)
            assert tokensA == new_tokensA
            assert tokensB == new_tokensB

    def test_align(self):
        # align all sequences using standard params
        self.alm.align()

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msaA = lp.SCA(value)
            msaB = lp.Multiple(value['seqs'])
            msaB.prog_align()
            assert msaA == msaB

            # now compare with different flag
            msaA = lp.Multiple([self.alm[idx, 'tokensb'] for idx in value['ID']])
            msaB = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False)
            msaA.lib_align()
            msaB.lib_align()
            assert msaA == msaB

    def test_get_consensus(self):
        # align all sequences using standard params
        self.alm.align()

        tree = TreeNode(
            Name='root',
            Children=[TreeNode(Name=line.split('\t')[1]) for line in
                      read_config_file(test_data('KSL2.qlc'))])

        self.alm.get_consensus(consensus="consensus", tree=tree)
        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        self.assertEqual(
            self.alm.get_list(language="Turkish", entry="consensus", flat=True),
            [''.join(x) for x in
             self.alm.get_list(language="Turkish", entry="tokens", flat=True)])

    def test_output(self):
        self.alm.align()
        self.alm.output('qlc', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))
Beispiel #2
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'),
                              loans=False,
                              _interactive=False)
        self.alm.align()

    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm:  #.get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokensA = self.alm[key, 'tokensa'].split(' ')
            tokensB = self.alm[key, 'tokensb'].split(' ')

            new_tokensA = lp.ipa2tokens(ipa,
                                        merge_vowels=True,
                                        merge_geminates=False)
            new_tokensB = lp.ipa2tokens(ipa,
                                        merge_vowels=False,
                                        merge_geminates=False)
            assert tokensA == new_tokensA
            assert tokensB == new_tokensB

    def test_align(self):
        self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x))
        self.alm.add_alignments(ref="cugid")

        # align all sequences using standard params
        self.alm.align(ref="cugid", alignment="alignment2")
        assert self.alm.msa["cugid"]["1"]["ID"] == self.alm.msa["cogid"][1][
            "ID"]

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msaA = lp.SCA(value)
            msaB = lp.Multiple(value['seqs'])
            msaB.prog_align()
            assert msaA == msaB

            # now compare with different flag
            msaA = lp.Multiple(
                [self.alm[idx, 'tokensb'] for idx in value['ID']])
            msaB = lp.Multiple([''.join(s) for s in value['seqs']],
                               merge_vowels=False)
            msaA.lib_align()
            msaB.lib_align()
            assert msaA == msaB

    def test_get_consensus(self):
        # align all sequences using standard params

        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        self.assertEqual(
            self.alm.get_list(language="Turkish", entry="consensus",
                              flat=True),
            [
                ''.join(x) for x in self.alm.get_list(
                    language="Turkish", entry="tokens", flat=True)
            ])

    def test_get_confidence(self):

        lex = LexStat(test_data('KSL3.qlc'))
        tmpDict = dict([(k, lex[k, 'numbers']) for k in lex])
        self.alm.add_entries('numbers', tmpDict, lambda x: x)
        corrs = self.alm.get_confidence(lex.rscorer, ref='cogid')
        self.alm.output('html',
                        filename=text_type(self.tmp_path('alm')),
                        confidence=True)

    def test_output(self):
        self.alm.output('tsv', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))
Beispiel #3
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'), loans=False,
                              _interactive=False)
        self.alm.align()

    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm:  # get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokens_a = self.alm[key, 'tokensa'].split(' ')
            tokens_b = self.alm[key, 'tokensb'].split(' ')

            new_tokens_a = lp.ipa2tokens(ipa, merge_vowels=True,
                                         merge_geminates=False)
            new_tokens_b = lp.ipa2tokens(ipa, merge_vowels=False,
                                         merge_geminates=False)
            assert tokens_a == new_tokens_a
            assert tokens_b == new_tokens_b

    def test_align(self):
        self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x))
        self.alm.add_alignments(ref="cugid")

        # align all sequences using standard params
        self.alm.align(ref="cugid", alignment="alignment2")
        assert (self.alm.msa["cugid"]["1"]["ID"] ==
                self.alm.msa["cogid"][1]["ID"])

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msa_a = lp.SCA(value)
            msa_b = lp.Multiple(value['seqs'])
            msa_b.prog_align()
            assert msa_a == msa_b

            # now compare with different flag
            msa_a = lp.Multiple([self.alm[idx, 'tokensb']
                                 for idx in value['ID']])
            msa_b = lp.Multiple([''.join(s) for s in value['seqs']],
                                merge_vowels=False)
            msa_a.lib_align()
            msa_b.lib_align()
            assert msa_a == msa_b

    def test_get_consensus(self):
        # align all sequences using standard params
        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        self.assertEqual(
            self.alm.get_list(language="Turkish", entry="consensus", flat=True),
            [''.join(x) for x in
             self.alm.get_list(language="Turkish", entry="tokens", flat=True)])

    def test_get_confidence(self):
        lex = LexStat(test_data('KSL3.qlc'))
        tmp_dict = dict([(k, lex[k, 'numbers']) for k in lex])
        self.alm.add_entries('numbers', tmp_dict, lambda x: x)
        # Run get_confidence to populate the output variable.
        # TODO: Check and document side-effects of this.
        _ = self.alm.get_confidence(lex.rscorer, ref='cogid')
        self.alm.output('html', filename=text_type(self.tmp_path('alm')),
                        confidence=True)

    def test_output(self):
        self.alm.output('tsv', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))
Beispiel #4
0
class TestAlignments(WithTempDir):
    def setUp(self):
        WithTempDir.setUp(self)
        self.alm = Alignments(test_data('KSL2.qlc'),
                              loans=False,
                              _interactive=False)

    def test_ipa2tokens(self):
        # iterate over the keys
        for key in self.alm:  #.get_list(language="Turkish",flat=True):
            ipa = self.alm[key, 'ipa']
            tokensA = self.alm[key, 'tokensa'].split(' ')
            tokensB = self.alm[key, 'tokensb'].split(' ')

            new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True)
            new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False)
            assert tokensA == new_tokensA
            assert tokensB == new_tokensB

    def test_align(self):
        # align all sequences using standard params
        self.alm.align()

        # iterate and align using the multiple function
        for key, value in self.alm.msa['cogid'].items():
            # first compare simple alignments
            msaA = lp.SCA(value)
            msaB = lp.Multiple(value['seqs'])
            msaB.prog_align()
            assert msaA == msaB

            # now compare with different flag
            msaA = lp.Multiple(
                [self.alm[idx, 'tokensb'] for idx in value['ID']])
            msaB = lp.Multiple([''.join(s) for s in value['seqs']],
                               merge_vowels=False)
            msaA.lib_align()
            msaB.lib_align()
            assert msaA == msaB

    def test_get_consensus(self):
        # align all sequences using standard params
        self.alm.align()

        tree = TreeNode(Name='root',
                        Children=[
                            TreeNode(Name=line.split('\t')[1])
                            for line in read_config_file(test_data('KSL2.qlc'))
                        ])

        self.alm.get_consensus(consensus="consensus", tree=tree)
        self.alm.get_consensus(consensus="consensus", classes=True)
        self.alm.get_consensus(consensus="consensus")

        # check whether Turkish strings are identical
        assert self.alm.get_list(
                    language="Turkish",
                    entry="consensus",
                    flat=True
                    ) == \
                            [''.join(x) for x in self.alm.get_list(
                                language="Turkish",
                                entry="tokens",
                                flat=True
                                )
                                ]

    def test_output(self):
        self.alm.align()
        self.alm.output('qlc', filename=text_type(self.tmp_path('test')))
        self.alm.output('html', filename=text_type(self.tmp_path('test')))