class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) def test_ipa2tokens(self): # iterate over the keys for key in self.alm: #.get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokensA = self.alm[key, 'tokensa'].split(' ') tokensB = self.alm[key, 'tokensb'].split(' ') new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True, merge_geminates=False) new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False, merge_geminates=False) assert tokensA == new_tokensA assert tokensB == new_tokensB def test_align(self): # align all sequences using standard params self.alm.align() # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msaA = lp.SCA(value) msaB = lp.Multiple(value['seqs']) msaB.prog_align() assert msaA == msaB # now compare with different flag msaA = lp.Multiple([self.alm[idx, 'tokensb'] for idx in value['ID']]) msaB = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msaA.lib_align() msaB.lib_align() assert msaA == msaB def test_get_consensus(self): # align all sequences using standard params self.alm.align() tree = TreeNode( Name='root', Children=[TreeNode(Name=line.split('\t')[1]) for line in read_config_file(test_data('KSL2.qlc'))]) self.alm.get_consensus(consensus="consensus", tree=tree) self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical self.assertEqual( self.alm.get_list(language="Turkish", entry="consensus", flat=True), [''.join(x) for x in self.alm.get_list(language="Turkish", entry="tokens", flat=True)]) def test_output(self): self.alm.align() self.alm.output('qlc', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) self.alm.align() def test_ipa2tokens(self): # iterate over the keys for key in self.alm: #.get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokensA = self.alm[key, 'tokensa'].split(' ') tokensB = self.alm[key, 'tokensb'].split(' ') new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True, merge_geminates=False) new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False, merge_geminates=False) assert tokensA == new_tokensA assert tokensB == new_tokensB def test_align(self): self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x)) self.alm.add_alignments(ref="cugid") # align all sequences using standard params self.alm.align(ref="cugid", alignment="alignment2") assert self.alm.msa["cugid"]["1"]["ID"] == self.alm.msa["cogid"][1][ "ID"] # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msaA = lp.SCA(value) msaB = lp.Multiple(value['seqs']) msaB.prog_align() assert msaA == msaB # now compare with different flag msaA = lp.Multiple( [self.alm[idx, 'tokensb'] for idx in value['ID']]) msaB = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msaA.lib_align() msaB.lib_align() assert msaA == msaB def test_get_consensus(self): # align all sequences using standard params self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical self.assertEqual( self.alm.get_list(language="Turkish", entry="consensus", flat=True), [ ''.join(x) for x in self.alm.get_list( language="Turkish", entry="tokens", flat=True) ]) def test_get_confidence(self): lex = LexStat(test_data('KSL3.qlc')) tmpDict = dict([(k, lex[k, 'numbers']) for k in lex]) self.alm.add_entries('numbers', tmpDict, lambda x: x) corrs = self.alm.get_confidence(lex.rscorer, ref='cogid') self.alm.output('html', filename=text_type(self.tmp_path('alm')), confidence=True) def test_output(self): self.alm.output('tsv', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) self.alm.align() def test_ipa2tokens(self): # iterate over the keys for key in self.alm: # get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokens_a = self.alm[key, 'tokensa'].split(' ') tokens_b = self.alm[key, 'tokensb'].split(' ') new_tokens_a = lp.ipa2tokens(ipa, merge_vowels=True, merge_geminates=False) new_tokens_b = lp.ipa2tokens(ipa, merge_vowels=False, merge_geminates=False) assert tokens_a == new_tokens_a assert tokens_b == new_tokens_b def test_align(self): self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x)) self.alm.add_alignments(ref="cugid") # align all sequences using standard params self.alm.align(ref="cugid", alignment="alignment2") assert (self.alm.msa["cugid"]["1"]["ID"] == self.alm.msa["cogid"][1]["ID"]) # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msa_a = lp.SCA(value) msa_b = lp.Multiple(value['seqs']) msa_b.prog_align() assert msa_a == msa_b # now compare with different flag msa_a = lp.Multiple([self.alm[idx, 'tokensb'] for idx in value['ID']]) msa_b = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msa_a.lib_align() msa_b.lib_align() assert msa_a == msa_b def test_get_consensus(self): # align all sequences using standard params self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical self.assertEqual( self.alm.get_list(language="Turkish", entry="consensus", flat=True), [''.join(x) for x in self.alm.get_list(language="Turkish", entry="tokens", flat=True)]) def test_get_confidence(self): lex = LexStat(test_data('KSL3.qlc')) tmp_dict = dict([(k, lex[k, 'numbers']) for k in lex]) self.alm.add_entries('numbers', tmp_dict, lambda x: x) # Run get_confidence to populate the output variable. # TODO: Check and document side-effects of this. _ = self.alm.get_confidence(lex.rscorer, ref='cogid') self.alm.output('html', filename=text_type(self.tmp_path('alm')), confidence=True) def test_output(self): self.alm.output('tsv', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) def test_ipa2tokens(self): # iterate over the keys for key in self.alm: #.get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokensA = self.alm[key, 'tokensa'].split(' ') tokensB = self.alm[key, 'tokensb'].split(' ') new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True) new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False) assert tokensA == new_tokensA assert tokensB == new_tokensB def test_align(self): # align all sequences using standard params self.alm.align() # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msaA = lp.SCA(value) msaB = lp.Multiple(value['seqs']) msaB.prog_align() assert msaA == msaB # now compare with different flag msaA = lp.Multiple( [self.alm[idx, 'tokensb'] for idx in value['ID']]) msaB = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msaA.lib_align() msaB.lib_align() assert msaA == msaB def test_get_consensus(self): # align all sequences using standard params self.alm.align() tree = TreeNode(Name='root', Children=[ TreeNode(Name=line.split('\t')[1]) for line in read_config_file(test_data('KSL2.qlc')) ]) self.alm.get_consensus(consensus="consensus", tree=tree) self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical assert self.alm.get_list( language="Turkish", entry="consensus", flat=True ) == \ [''.join(x) for x in self.alm.get_list( language="Turkish", entry="tokens", flat=True ) ] def test_output(self): self.alm.align() self.alm.output('qlc', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))