def test_wl2qlc(self): stamp = 'test-stamp' out = self.tmp_path('test') wl2qlc(self.wordlist.header, self.wordlist._data, filename=out.as_posix(), stamp=stamp) out = self.tmp_path('test.qlc') with out.open(encoding='utf8') as fp: self.assertTrue(fp.read().endswith(stamp)) # load a worldist with alignments and otuput it as string with msapairs tmp = Alignments(test_data('good_file.tsv'), ref='cogid') tmp.align(ref="cogid") wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[]) tmp.get_consensus(ref="cogid") wl2qlc([h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect,concept") wl2qlc([h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect")
def test_wl2qlc(self): stamp = 'test-stamp' out = self.tmp_path('test') wl2qlc(self.wordlist.header, self.wordlist._data, filename=out.as_posix(), stamp=stamp) out = self.tmp_path('test.qlc') with out.open(encoding='utf8') as fp: self.assertTrue(fp.read().endswith(stamp)) # load a worldist with alignments and otuput it as string with msapairs tmp = Alignments(test_data('good_file.tsv'), ref='cogid') tmp.align(ref="cogid") wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[]) tmp.get_consensus(ref="cogid") wl2qlc([ h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x]) ], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect,concept") wl2qlc([ h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x]) ], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect")
def test_wl2qlc(tmppath, test_data, wordlist): stamp = 'test-stamp' out = tmppath / 'test' wl2qlc(wordlist.header, wordlist._data, filename=str(out), stamp=stamp) out = tmppath / 'test.qlc' assert out.read_text(encoding='utf8').endswith(stamp) # load a worldist with alignments and otuput it as string with msapairs tmp = Alignments(str(test_data / 'good_file.tsv'), ref='cogid') tmp.align(ref="cogid") wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=str(out), stamp='stampo', ignore=[]) tmp.get_consensus(ref="cogid") wl2qlc( [h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect,concept") wl2qlc( [h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect")
def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) self.alm.align()
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) self.alm.align() def test_ipa2tokens(self): # iterate over the keys for key in self.alm: #.get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokensA = self.alm[key, 'tokensa'].split(' ') tokensB = self.alm[key, 'tokensb'].split(' ') new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True, merge_geminates=False) new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False, merge_geminates=False) assert tokensA == new_tokensA assert tokensB == new_tokensB def test_align(self): self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x)) self.alm.add_alignments(ref="cugid") # align all sequences using standard params self.alm.align(ref="cugid", alignment="alignment2") assert self.alm.msa["cugid"]["1"]["ID"] == self.alm.msa["cogid"][1][ "ID"] # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msaA = lp.SCA(value) msaB = lp.Multiple(value['seqs']) msaB.prog_align() assert msaA == msaB # now compare with different flag msaA = lp.Multiple( [self.alm[idx, 'tokensb'] for idx in value['ID']]) msaB = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msaA.lib_align() msaB.lib_align() assert msaA == msaB def test_get_consensus(self): # align all sequences using standard params self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical self.assertEqual( self.alm.get_list(language="Turkish", entry="consensus", flat=True), [ ''.join(x) for x in self.alm.get_list( language="Turkish", entry="tokens", flat=True) ]) def test_get_confidence(self): lex = LexStat(test_data('KSL3.qlc')) tmpDict = dict([(k, lex[k, 'numbers']) for k in lex]) self.alm.add_entries('numbers', tmpDict, lambda x: x) corrs = self.alm.get_confidence(lex.rscorer, ref='cogid') self.alm.output('html', filename=text_type(self.tmp_path('alm')), confidence=True) def test_output(self): self.alm.output('tsv', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))
def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False)
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) def test_ipa2tokens(self): # iterate over the keys for key in self.alm: #.get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokensA = self.alm[key, 'tokensa'].split(' ') tokensB = self.alm[key, 'tokensb'].split(' ') new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True) new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False) assert tokensA == new_tokensA assert tokensB == new_tokensB def test_align(self): # align all sequences using standard params self.alm.align() # iterate and align using the multiple function for key,value in self.alm.msa['cogid'].items(): # first compare simple alignments msaA = lp.SCA(value) msaB = lp.Multiple(value['seqs']) msaB.prog_align() assert msaA == msaB # now compare with different flag msaA = lp.Multiple([self.alm[idx,'tokensb'] for idx in value['ID']]) msaB = lp.Multiple([''.join(s) for s in value['seqs']],merge_vowels=False) msaA.lib_align() msaB.lib_align() assert msaA == msaB def test_get_consensus(self): # align all sequences using standard params self.alm.align() tree = TreeNode( Name='root', Children=[TreeNode(Name=line.split('\t')[1]) for line in read_config_file(test_data('KSL2.qlc'))]) self.alm.get_consensus(consensus="consensus", tree=tree) self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical assert self.alm.get_list( language="Turkish", entry="consensus", flat=True ) == \ [''.join(x) for x in self.alm.get_list( language="Turkish", entry="tokens", flat=True ) ] def test_output(self): self.alm.align() self.alm.output('qlc', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) self.alm.align() def test_ipa2tokens(self): # iterate over the keys for key in self.alm: # get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokens_a = self.alm[key, 'tokensa'].split(' ') tokens_b = self.alm[key, 'tokensb'].split(' ') new_tokens_a = lp.ipa2tokens(ipa, merge_vowels=True, merge_geminates=False) new_tokens_b = lp.ipa2tokens(ipa, merge_vowels=False, merge_geminates=False) assert tokens_a == new_tokens_a assert tokens_b == new_tokens_b def test_align(self): self.alm.add_entries('cugid', self.alm._ref, lambda x: text_type(x)) self.alm.add_alignments(ref="cugid") # align all sequences using standard params self.alm.align(ref="cugid", alignment="alignment2") assert (self.alm.msa["cugid"]["1"]["ID"] == self.alm.msa["cogid"][1]["ID"]) # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msa_a = lp.SCA(value) msa_b = lp.Multiple(value['seqs']) msa_b.prog_align() assert msa_a == msa_b # now compare with different flag msa_a = lp.Multiple([self.alm[idx, 'tokensb'] for idx in value['ID']]) msa_b = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msa_a.lib_align() msa_b.lib_align() assert msa_a == msa_b def test_get_consensus(self): # align all sequences using standard params self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical self.assertEqual( self.alm.get_list(language="Turkish", entry="consensus", flat=True), [''.join(x) for x in self.alm.get_list(language="Turkish", entry="tokens", flat=True)]) def test_get_confidence(self): lex = LexStat(test_data('KSL3.qlc')) tmp_dict = dict([(k, lex[k, 'numbers']) for k in lex]) self.alm.add_entries('numbers', tmp_dict, lambda x: x) # Run get_confidence to populate the output variable. # TODO: Check and document side-effects of this. _ = self.alm.get_confidence(lex.rscorer, ref='cogid') self.alm.output('html', filename=text_type(self.tmp_path('alm')), confidence=True) def test_output(self): self.alm.output('tsv', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))
def alm(test_data): a = Alignments(str(test_data / 'KSL2.qlc'), loans=False,_interactive=False) a.align() return a
class TestAlignments(WithTempDir): def setUp(self): WithTempDir.setUp(self) self.alm = Alignments(test_data('KSL2.qlc'), loans=False, _interactive=False) def test_ipa2tokens(self): # iterate over the keys for key in self.alm: #.get_list(language="Turkish",flat=True): ipa = self.alm[key, 'ipa'] tokensA = self.alm[key, 'tokensa'].split(' ') tokensB = self.alm[key, 'tokensb'].split(' ') new_tokensA = lp.ipa2tokens(ipa, merge_vowels=True) new_tokensB = lp.ipa2tokens(ipa, merge_vowels=False) assert tokensA == new_tokensA assert tokensB == new_tokensB def test_align(self): # align all sequences using standard params self.alm.align() # iterate and align using the multiple function for key, value in self.alm.msa['cogid'].items(): # first compare simple alignments msaA = lp.SCA(value) msaB = lp.Multiple(value['seqs']) msaB.prog_align() assert msaA == msaB # now compare with different flag msaA = lp.Multiple( [self.alm[idx, 'tokensb'] for idx in value['ID']]) msaB = lp.Multiple([''.join(s) for s in value['seqs']], merge_vowels=False) msaA.lib_align() msaB.lib_align() assert msaA == msaB def test_get_consensus(self): # align all sequences using standard params self.alm.align() tree = TreeNode(Name='root', Children=[ TreeNode(Name=line.split('\t')[1]) for line in read_config_file(test_data('KSL2.qlc')) ]) self.alm.get_consensus(consensus="consensus", tree=tree) self.alm.get_consensus(consensus="consensus", classes=True) self.alm.get_consensus(consensus="consensus") # check whether Turkish strings are identical assert self.alm.get_list( language="Turkish", entry="consensus", flat=True ) == \ [''.join(x) for x in self.alm.get_list( language="Turkish", entry="tokens", flat=True ) ] def test_output(self): self.alm.align() self.alm.output('qlc', filename=text_type(self.tmp_path('test'))) self.alm.output('html', filename=text_type(self.tmp_path('test')))