def test_from_file_GC(self): sto = StringIO("# STOCKHOLM 1.0\n" "seq1 ACC-G-GGTA\nseq2 TCC-G-GGCA\n" "#=GC SS_cons (((....)))\n//") obs_sto = next(StockholmAlignment.from_file(sto, DNA)) exp_sto = StockholmAlignment(self.seqs, {}, {}, {}, self.GC) self.assertEqual(obs_sto, exp_sto)
def test_from_file_GS(self): sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n" "seq1 ACC-G-GGTA\n" "seq2 TCC-G-GGCA\n//") obs_sto = next(StockholmAlignment.from_file(sto, DNA)) exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {}) self.assertEqual(obs_sto, exp_sto)
def test_str_gf(self): st = StockholmAlignment(self.seqs, gc=None, gf=self.GF, gs=None, gr=None) obs = str(st) exp = ('# STOCKHOLM 1.0\n' '#=GF AC RF00360\n' '#=GF BM cmbuild -F CM SEED\n' '#=GF BM cmsearch -Z 274931 -E 1000000\n' '#=GF SQ 9\n' '#=GF RN [1]\n' '#=GF RM 11469857\n' '#=GF RT TITLE1\n' '#=GF RA Auth1;\n' '#=GF RL J Mol Biol\n' '#=GF RN [2]\n' '#=GF RM 12007400\n' '#=GF RT TITLE2\n' '#=GF RA Auth2;\n' '#=GF RL Cell\n' 'seq1 ACC-G-GGTA\n' 'seq2 TCC-G-GGCA\n//') self.assertEqual(obs, exp)
def test_str(self): """ Make sure stockholm with all information contained is formatted correctly """ st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR) obs = str(st) exp = ('# STOCKHOLM 1.0\n' '#=GF AC RF00360\n' '#=GF BM cmbuild -F CM SEED\n' '#=GF BM cmsearch -Z 274931 -E 1000000\n' '#=GF SQ 9\n' '#=GF RN [1]\n' '#=GF RM 11469857\n' '#=GF RT TITLE1\n' '#=GF RA Auth1;\n' '#=GF RL J Mol Biol\n' '#=GF RN [2]\n' '#=GF RM 12007400\n' '#=GF RT TITLE2\n' '#=GF RA Auth2;\n' '#=GF RL Cell\n' '#=GS seq1 AC 111\n' '#=GS seq2 AC 222\n' 'seq1 ACC-G-GGTA\n' '#=GR seq1 SS 1110101111\n' 'seq2 TCC-G-GGCA\n' '#=GR seq2 SS 0110101110\n' '#=GC SS_cons (((....)))\n//') self.assertEqual(obs, exp)
def test_to_file(self): st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR) with tempfile.NamedTemporaryFile('r+') as temp_file: st.to_file(temp_file) temp_file.flush() temp_file.seek(0) obs = temp_file.read() exp = ('# STOCKHOLM 1.0\n' '#=GF AC RF00360\n' '#=GF BM cmbuild -F CM SEED\n' '#=GF BM cmsearch -Z 274931 -E 1000000\n' '#=GF SQ 9\n' '#=GF RN [1]\n' '#=GF RM 11469857\n' '#=GF RT TITLE1\n' '#=GF RA Auth1;\n' '#=GF RL J Mol Biol\n' '#=GF RN [2]\n' '#=GF RM 12007400\n' '#=GF RT TITLE2\n' '#=GF RA Auth2;\n' '#=GF RL Cell\n' '#=GS seq1 AC 111\n' '#=GS seq2 AC 222\n' 'seq1 ACC-G-GGTA\n' '#=GR seq1 SS 1110101111\n' 'seq2 TCC-G-GGCA\n' '#=GR seq2 SS 0110101110\n' '#=GC SS_cons (((....)))\n//') self.assertEqual(obs, exp)
def test_from_file_GR(self): sto = StringIO("# STOCKHOLM 1.0\nseq1 ACC-G\n" "#=GR seq1 SS 11101\nseq2 TCC-G\n" "#=GR seq2 SS 01101\n\nseq1 -GGTA\n" "#=GR seq1 SS 01111\nseq2 -GGCA\n" "#=GR seq2 SS 01110\n//") obs_sto = next(StockholmAlignment.from_file(sto, DNA)) exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {}) self.assertEqual(obs_sto, exp_sto)
def test_from_file_multi(self): sto = StringIO("# STOCKHOLM 1.0\n#=GS seq2 AC 222\n#=GS seq1 AC 111\n" "seq1 ACC-G-GGTA\n" "seq2 TCC-G-GGCA\n//\n" "# STOCKHOLM 1.0\nseq1 ACC-G-GGTA\n" "#=GR seq1 SS 1110101111\nseq2 TCC-G-GGCA\n" "#=GR seq2 SS 0110101110\n//") obs_sto = StockholmAlignment.from_file(sto, DNA) count = 0 for obs in obs_sto: if count == 0: exp_sto = StockholmAlignment(self.seqs, {}, self.GS, {}, {}) self.assertEqual(obs, exp_sto) elif count == 1: exp_sto = StockholmAlignment(self.seqs, {}, {}, self.GR, {}) self.assertEqual(obs, exp_sto) else: raise AssertionError("More than 2 sto alignments parsed!") count += 1
def test_from_file_alignment(self): """make sure can parse basic sto file with interleaved alignment""" sto = StringIO("# STOCKHOLM 1.0\n" "seq1 ACC-G\n" "seq2 TCC-G\n\n" "seq1 -GGTA\n" "seq2 -GGCA\n//") obs_sto = next(StockholmAlignment.from_file(sto, DNA)) exp_sto = StockholmAlignment(self.seqs) self.assertEqual(obs_sto, exp_sto)
def test_str_gr(self): """ Make sure stockholm with only GR information contained is formatted correctly """ st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=None, gr=self.GR) obs = str(st) exp = ("# STOCKHOLM 1.0\nseq1 ACC-G-GGTA\n" "#=GR seq1 SS 1110101111\nseq2 TCC-G-GGCA\n" "#=GR seq2 SS 0110101110\n//") self.assertEqual(obs, exp)
def test_str_gc(self): """ Make sure stockholm with only GC information contained is formatted correctly """ st = StockholmAlignment(self.seqs, gc=self.GC, gf=None, gs=None, gr=None) obs = str(st) exp = ("# STOCKHOLM 1.0\nseq1 ACC-G-GGTA\n" "seq2 TCC-G-GGCA\n" "#=GC SS_cons (((....)))\n//") self.assertEqual(obs, exp)
def test_str_gc(self): st = StockholmAlignment(self.seqs, gc=self.GC, gf=None, gs=None, gr=None) obs = str(st) exp = ("# STOCKHOLM 1.0\nseq1 ACC-G-GGTA\n" "seq2 TCC-G-GGCA\n" "#=GC SS_cons (((....)))\n//") self.assertEqual(obs, exp)
def test_str_gr(self): st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=None, gr=self.GR) obs = str(st) exp = ("# STOCKHOLM 1.0\nseq1 ACC-G-GGTA\n" "#=GR seq1 SS 1110101111\nseq2 TCC-G-GGCA\n" "#=GR seq2 SS 0110101110\n//") self.assertEqual(obs, exp)
def test_str_trees(self): """ Make sure stockholm with trees printed correctly""" GF = OrderedDict({"NH": ["IMATREE", "IMATREETOO"], "TN": ["Tree2", "Tree1"]}) st = StockholmAlignment(self.seqs, gc=None, gf=GF, gs=None, gr=None) obs = str(st) exp = ("# STOCKHOLM 1.0\n#=GF TN Tree2\n#=GF NH IMATREE\n#=GF TN Tree1" "\n#=GF NH IMATREETOO\nseq1 ACC-G-GGTA\n" "seq2 TCC-G-GGCA\n//") self.assertEqual(obs, exp)
def test_str_gs(self): """ Make sure stockholm with only GS information contained is formatted correctly """ st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=self.GS, gr=None) obs = str(st) exp = ('# STOCKHOLM 1.0\n' '#=GS seq1 AC 111\n' '#=GS seq2 AC 222\n' 'seq1 ACC-G-GGTA\n' 'seq2 TCC-G-GGCA\n//') self.assertEqual(obs, exp)
def test_str_gs(self): st = StockholmAlignment(self.seqs, gc=None, gf=None, gs=self.GS, gr=None) obs = str(st) exp = ('# STOCKHOLM 1.0\n' '#=GS seq1 AC 111\n' '#=GS seq2 AC 222\n' 'seq1 ACC-G-GGTA\n' 'seq2 TCC-G-GGCA\n//') self.assertEqual(obs, exp)
def test_from_file_GF(self): # remove rn line to make sure auto-added self.GF.pop("RN") sto = StringIO("# STOCKHOLM 1.0\n#=GF RN [1]\n#=GF RM 11469857\n" "#=GF RT TITLE1\n#=GF RA Auth1;\n#=GF RL J Mol Biol\n" "#=GF RN [2]\n#=GF RM 12007400\n#=GF RT TITLE2\n" "#=GF RA Auth2;\n#=GF RL Cell\n#=GF AC RF00360\n" "#=GF BM cmbuild -F CM SEED\n" "#=GF BM cmsearch -Z 274931 -E 1000000\n#=GF SQ 9\n" "seq1 ACC-G-GGTA\nseq2 TCC-G-GGCA\n//") obs_sto = next(StockholmAlignment.from_file(sto, DNA)) exp_sto = StockholmAlignment(self.seqs, self.GF, {}, {}, {}) self.assertEqual(obs_sto, exp_sto)
def setUp(self): """Setup for stockholm tests.""" self.seqs = [DNASequence("ACC-G-GGTA", id="seq1"), DNASequence("TCC-G-GGCA", id="seq2")] self.GF = OrderedDict([ ("AC", "RF00360"), ("BM", ["cmbuild -F CM SEED", "cmsearch -Z 274931 -E 1000000"]), ("SQ", "9"), ("RT", ["TITLE1", "TITLE2"]), ("RN", ["[1]", "[2]"]), ("RA", ["Auth1;", "Auth2;"]), ("RL", ["J Mol Biol", "Cell"]), ("RM", ["11469857", "12007400"]), ('RN', ['[1]', '[2]']) ]) self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])} self.GR = {"SS": OrderedDict([("seq1", "1110101111"), ("seq2", "0110101110")])} self.GC = {"SS_cons": "(((....)))"} self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF, gs=self.GS, gr=self.GR)