def test_save_load(self): seq = BioSeq.create_bio_seq("ACTGTCATAT") seq.save('tests/files/test_save_load.csv') loaded_seq = BioSeq.load('tests/files/test_save_load.csv') self.assertEqual(seq.get_seq(), loaded_seq.get_seq()) self.assertEqual(seq.get_genetic_code(), loaded_seq.get_genetic_code()) self.assertEqual(seq.__class__.__name__, loaded_seq.__class__.__name__)
def test_eq(self): s1 = BioSeq("ATATAT", "DNA") s2 = BioSeq("ATATAT", "DNA") self.assertTrue(s1 == s2) self.assertFalse(s1 != s2) s3 = BioSeq("ATATATX", "DNA") s4 = BioSeq("ATATAT", "PROTEIN") self.assertFalse(s1 == s3)
def test_frequency(self): s = BioSeq("AACCCCCTGG", "DNA") self.assertDictEqual({ "A": 0.2, "C": 0.5, "T": 0.1, "G": 0.2 }, s.frequency())
def test_global_align(self): # example from page 39 from slides s1 = BioSeq("PHSWG", "PROTEIN") s2 = BioSeq("HGWAG", "PROTEIN") sm = read_substitution_matrix_file("test/blosum62.mat") s, t = s1.global_align_multiple_solutions(s2, sm, -8) self.assertListEqual([-40, -24, -10, 3, 11, 9], s[-1]) recover = list(s1.recover_global_align_multiple_solutions(s2, t)) self.assertListEqual([('PHSW_G', '_HGWAG')], recover)
def test_validate(self): dna = BioSeq.create_bio_seq("ACTG") rna = BioSeq.create_bio_seq("ACUG", "rna") self.assertTrue(dna.validate()) self.assertTrue(rna.validate()) # seq.csv contains a DNA sequence rna.read_sequence('tests/files/seq.csv') self.assertFalse(rna.validate())
def test_recover_local_align_multiple_solutions(self): # Classes example ga_score, ga_trace, _ = local_align_multiple_solutions( self.slides_seq2, self.slides_seq1, self.sm, -8) rga = recover_local_align_multiple_solutions(ga_score, ga_trace, self.slides_seq2, self.slides_seq1) self.assertEqual(rga, [['HSW', 'HGW'], ['HSWG', 'HGWA']]) ga_score, ga_trace, _ = local_align_multiple_solutions( self.seq1, self.seq2, self.sm_dna, -1) rga = recover_local_align_multiple_solutions(ga_score, ga_trace, self.seq1, self.seq2) self.assertEqual(rga, [['AT', 'AT'], ['CA', 'CA']]) seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas') ga_score, ga_trace, _ = local_align_multiple_solutions( seqs['sp|C1F111'], seqs['sp|B7JC18'], self.sm, -3) rga = recover_local_align_multiple_solutions(ga_score, ga_trace, seqs['sp|C1F111'], seqs['sp|B7JC18']) # 4 local optimal alignments between sp|C1F111 & sp|B7JC18 self.assertEqual(len(rga), 4) print('>> Passed test_recover_local_align_multiple_solutions()')
def test_save_load(self): filename = "test_save_load.tmp" s = BioSeq("ATATACAGATGAT", "DNA") s.save(filename) s2 = BioSeq("", "RNA") s2.load(filename) self.assertEqual(s.seq_type, s2.seq_type) self.assertEqual(s.sequence, s2.sequence) os.remove(filename)
def test_gc_content(self): s = BioSeq("AACCCCCTGG", "DNA") self.assertEqual(0.70, s.gc_content()) s.sequence = "AA" self.assertEqual(0.0, s.gc_content()) s.sequence = "GC" self.assertEqual(1.0, s.gc_content())
def test_read_write(self): seq = BioSeq.create_bio_seq("ACTGTCATAT") len_bwr = len(seq) seq.write_sequence('tests/files/test_write.csv') seq.read_sequence('tests/files/seq.csv') self.assertGreater(len(seq), len_bwr) self.assertEqual(2591, len(seq)) seq.read_sequence('tests/files/test_write.csv') self.assertEqual(len_bwr, len(seq))
def test_getslice(self): s = BioSeq("ATATAT", "DNA") self.assertEqual("A", s[0:1]) self.assertEqual("T", s[-1:]) self.assertEqual("AT", s[0:2]) self.assertEqual("A", s[0:1:1]) self.assertEqual("T", s[-1::1]) self.assertEqual("AAA", s[0::2]) # explicit invocation of function self.assertEqual("A", s.__getslice__(0, 1)) self.assertEqual("T", s.__getslice__(-1, len(s))) self.assertEqual("AT", s.__getslice__(0, 2)) self.assertEqual("A", s.__getslice__(0, 1, 1)) self.assertEqual("T", s.__getslice__(-1, len(s), 1)) self.assertEqual("AAA", s.__getslice__(0, len(s), 2))
def test_dot_plot(self): s1 = BioSeq("ATAT", "DNA") s2 = BioSeq("ATAT", "DNA") m = s1.dot_plot(s2) self.assertEqual(m.sum(), 8) s3 = BioSeq("XXXT", "DNA") m = s1.dot_plot(s3) self.assertEqual(m.sum(), 2)
def test_global_align2(self): # example from page 46 from slides s1 = BioSeq("PHSWG", "PROTEIN") s2 = BioSeq("HGWAG", "PROTEIN") sm = read_substitution_matrix_file("test/blosum62.mat") s, t = s1.local_align_multiple_solutions(s2, sm, -8) self.assertListEqual([0, 0, 6, 11, 19, 17], s[-1]) recover = list(s1.recover_local_align_multiple_solutions(s2, t, s)) self.assertListEqual([('HSW', 'HGW'), ('HSWG', 'HGWA')], recover) self.assertEqual( len(BioSeq.compare_pairwise_global_align([s1, s2], sm, -8)), 2) self.assertEqual( len(BioSeq.compare_pairwise_local_align([s1, s2], sm, -8)), 2)
def test_compare_pairwise_num_local_align(self): seqs = list( BioSeq.read_fasta_file( 'tests/files/protein_sequences.fas').values()) cla = compare_pairwise_num_local_align(seqs, self.sm, -3) # Some random values self.assertEqual( cla[4][3], 4) # Between sp|C1F111 & sp|B7JC18 - matches previous test self.assertEqual(cla[0][0], 1) self.assertEqual(cla[9][6], 144) self.assertEqual(cla[8][10], 1152) print('>> Passed test_compare_num_pairwise_local_align()')
def test_recover_global_align_multiple_solutions(self): _, ga_trace = global_align_multiple_solutions(self.slides_seq1, self.slides_seq2, self.sm, -3) rga = recover_global_align_multiple_solutions(ga_trace, self.slides_seq1, self.slides_seq2) seq1_alignments = [align[0] for align in rga] seq2_alignments = [align[1] for align in rga] # Classes Example self.assertTrue('-HGWAG' in seq1_alignments) self.assertTrue('PHSW-G' in seq2_alignments) # C2 example _, ga_trace = global_align_multiple_solutions(self.seq1, self.seq2, self.sm_dna, -1) rga = recover_global_align_multiple_solutions(ga_trace, self.seq1, self.seq2) seq1_alignments = [align[0] for align in rga] seq2_alignments = [align[1] for align in rga] self.assertEqual(len(seq1_alignments), 3) self.assertTrue('G-ATTACA' in seq1_alignments) self.assertTrue('GCA-TGCT' in seq2_alignments) self.assertTrue('GCAT-GCT' in seq2_alignments) self.assertTrue('GCATG-CT' in seq2_alignments) seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas') _, ga_trace = global_align_multiple_solutions(seqs['sp|C1F111'], seqs['sp|B7JC18'], self.sm, -3) rga = recover_global_align_multiple_solutions(ga_trace, seqs['sp|C1F111'], seqs['sp|B7JC18']) # 5760 global optimal alignments between sp|C1F111 & sp|B7JC18 self.assertEqual(len(rga), 5760) print('>> Passed test_recover_global_align_multiple_solutions()')
def test_basics(self): seq = BioSeq.create_bio_seq("ACTGTCATGAT") self.assertEqual(seq.get_seq(), "ACTGTCATGAT") self.assertEqual(len(seq), len("ACTGTCATGAT"))
def test_rev_complement(self): self.assertEqual( BioSeq.create_bio_seq("ACGGTA").reverse_complement(), "TACCGT") self.assertEqual( BioSeq.create_bio_seq("ACGUUA", "rna").reverse_complement(), "UAACGU")
def test_gc_percent(self): dna = BioSeq.create_bio_seq("ACGG") self.assertEqual(dna.gc_percent(), 0.75) self.assertEqual(dna.gc_percent_sub_seq(2), [0.5, 1])
def test_score_seq(self): sm = substitution_matrix("ATCG", 2, -3) s1 = BioSeq("ATAT", "DNA") s2 = BioSeq("ATAG", "DNA") self.assertEqual(3, s1.score_seq(s2, sm, 3))
def test_str(self): s = BioSeq("ATATAT", "DNA") self.assertEqual("ATATAT", str(s))
def test_pretty_print(self): s = BioSeq("ATATACAGATGAT", "DNA") self._test_pretty_print(s)
def test_constructor(self): s = BioSeq("ATATat", "DNa") self.assertIsInstance(s, BioSeq) self.assertEqual("ATATAT", s.sequence) self.assertEqual("DNA", s.seq_type) self.assertRaises(Exception, BioSeq, "ATAT", "smth")
def test_assert_seq_type(self): s = BioSeq("ATATAT", "DNA") s = BioSeq("ATATAT", "RNA") s = BioSeq("ATATAT", "PROTEIN") self.assertRaises(Exception, BioSeq, "ATATA", "asd")
def test_getitem(self): s = BioSeq("ATATAT", "DNA") self.assertEqual("A", s[0]) self.assertEqual("T", s[-1])
def test_repr(self): s = [BioSeq("ATATAT", "DNA")] self.assertEqual("[ATATAT]", str(s))
def test_freq(self): seq = BioSeq.create_bio_seq("ACTGTCATAT") self.assertEqual(seq.freq_symbols(), {"T": 4, "A": 3, "C": 2, "G": 1})
def test_score_affine_gap(self): sm = read_substitution_matrix_file("test/blosum62.mat") s1 = BioSeq("LGPSSGCASRIWTKSA", "PROTEIN") s2 = BioSeq("TGPS_G__S_IWSKSG", "PROTEIN") self.assertEqual(33, s1.score_affine_gap(s2, sm, -8, -2))
def test_len(self): s = BioSeq("ATATAT", "DNA") self.assertEqual(6, len(s))
def test_add_gap(self): s = BioSeq("ATGC", "DNA") self.assertEqual("ATGC", str(s)) s.add_gap(2) self.assertEqual("AT_GC", str(s))
print('Seq1: ' + str(align[0]) + '\nSeq2: ' + str(align[1]) + '\n') wait_input() print("\n-------------------------\n") print("Now shall we make it a little bit more complex?\n") wait_input() print( "So, lets load and use the proteins present in the 'tests/files/protein_sequences.fas' file!\n\ We need to use the read fasta functionality from the bioseq library to load them.\n" ) wait_input() print("The protein sequences are:\n") seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas') for key, value in seqs.items(): print("> " + str(key) + ": " + str(value) + "\n") wait_input() print("::: GLOBAL ALIGNMENT with multiple solutions :::\n") print( "\nLets make the global alignment with two of our protein sequences: sp|B0C882: & sp|A1TQI0, the 'sm_dna' and a gap of -3, shall we?\n" ) p_seq1 = seqs["sp|B0C882"] p_seq2 = seqs["sp|A1TQI0"] wait_input() ga_score, ga_trace = global_align_multiple_solutions( p_seq1, p_seq2, sm_blosum, -3) print("Score matrix obtained:\n")
def test_hamming_distance(self): s1 = BioSeq("ATATACAGATGAT", "DNA") s2 = BioSeq("ATATACAGATGAT", "DNA") self.assertEqual(0, s1.hamming_distance(s2)) self.assertEqual(0, s2.hamming_distance(s1)) s3 = BioSeq("ATATACAGATGAX", "DNA") self.assertEqual(1, s1.hamming_distance(s3)) self.assertEqual(1, s3.hamming_distance(s1)) s4 = BioSeq("AAA", "DNA") self.assertRaises(Exception, s1.hamming_distance, s4)