def test_recover_local_align_multiple_solutions(self): # Classes example ga_score, ga_trace, _ = local_align_multiple_solutions( self.slides_seq2, self.slides_seq1, self.sm, -8) rga = recover_local_align_multiple_solutions(ga_score, ga_trace, self.slides_seq2, self.slides_seq1) self.assertEqual(rga, [['HSW', 'HGW'], ['HSWG', 'HGWA']]) ga_score, ga_trace, _ = local_align_multiple_solutions( self.seq1, self.seq2, self.sm_dna, -1) rga = recover_local_align_multiple_solutions(ga_score, ga_trace, self.seq1, self.seq2) self.assertEqual(rga, [['AT', 'AT'], ['CA', 'CA']]) seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas') ga_score, ga_trace, _ = local_align_multiple_solutions( seqs['sp|C1F111'], seqs['sp|B7JC18'], self.sm, -3) rga = recover_local_align_multiple_solutions(ga_score, ga_trace, seqs['sp|C1F111'], seqs['sp|B7JC18']) # 4 local optimal alignments between sp|C1F111 & sp|B7JC18 self.assertEqual(len(rga), 4) print('>> Passed test_recover_local_align_multiple_solutions()')
def test_compare_pairwise_num_local_align(self): seqs = list( BioSeq.read_fasta_file( 'tests/files/protein_sequences.fas').values()) cla = compare_pairwise_num_local_align(seqs, self.sm, -3) # Some random values self.assertEqual( cla[4][3], 4) # Between sp|C1F111 & sp|B7JC18 - matches previous test self.assertEqual(cla[0][0], 1) self.assertEqual(cla[9][6], 144) self.assertEqual(cla[8][10], 1152) print('>> Passed test_compare_num_pairwise_local_align()')
def test_recover_global_align_multiple_solutions(self): _, ga_trace = global_align_multiple_solutions(self.slides_seq1, self.slides_seq2, self.sm, -3) rga = recover_global_align_multiple_solutions(ga_trace, self.slides_seq1, self.slides_seq2) seq1_alignments = [align[0] for align in rga] seq2_alignments = [align[1] for align in rga] # Classes Example self.assertTrue('-HGWAG' in seq1_alignments) self.assertTrue('PHSW-G' in seq2_alignments) # C2 example _, ga_trace = global_align_multiple_solutions(self.seq1, self.seq2, self.sm_dna, -1) rga = recover_global_align_multiple_solutions(ga_trace, self.seq1, self.seq2) seq1_alignments = [align[0] for align in rga] seq2_alignments = [align[1] for align in rga] self.assertEqual(len(seq1_alignments), 3) self.assertTrue('G-ATTACA' in seq1_alignments) self.assertTrue('GCA-TGCT' in seq2_alignments) self.assertTrue('GCAT-GCT' in seq2_alignments) self.assertTrue('GCATG-CT' in seq2_alignments) seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas') _, ga_trace = global_align_multiple_solutions(seqs['sp|C1F111'], seqs['sp|B7JC18'], self.sm, -3) rga = recover_global_align_multiple_solutions(ga_trace, seqs['sp|C1F111'], seqs['sp|B7JC18']) # 5760 global optimal alignments between sp|C1F111 & sp|B7JC18 self.assertEqual(len(rga), 5760) print('>> Passed test_recover_global_align_multiple_solutions()')
print('Seq1: ' + str(align[0]) + '\nSeq2: ' + str(align[1]) + '\n') wait_input() print("\n-------------------------\n") print("Now shall we make it a little bit more complex?\n") wait_input() print( "So, lets load and use the proteins present in the 'tests/files/protein_sequences.fas' file!\n\ We need to use the read fasta functionality from the bioseq library to load them.\n" ) wait_input() print("The protein sequences are:\n") seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas') for key, value in seqs.items(): print("> " + str(key) + ": " + str(value) + "\n") wait_input() print("::: GLOBAL ALIGNMENT with multiple solutions :::\n") print( "\nLets make the global alignment with two of our protein sequences: sp|B0C882: & sp|A1TQI0, the 'sm_dna' and a gap of -3, shall we?\n" ) p_seq1 = seqs["sp|B0C882"] p_seq2 = seqs["sp|A1TQI0"] wait_input() ga_score, ga_trace = global_align_multiple_solutions( p_seq1, p_seq2, sm_blosum, -3) print("Score matrix obtained:\n")