def test_rna_translate_rosalind_data(): print("Positive Test case #1: Rosalind Sample Data") rosalind_data_file = open("rosalind_data.txt", "r") rosalind_data_list = rosalind_data_file.readlines() rosalind_data_file.close() rosalind_data_list[0] = rosalind_data_list[ 0][:-1] # trim off the new line char print("Result: " + str(rna_translate(rosalind_data_list[0]) == rosalind_data_list[1])) print()
def test_rna_translate_no_stop(): print("Negative Test case #1: No stop codon") rosalind_data_file = open("rosalind_data.txt", "r") rosalind_data_list = rosalind_data_file.readlines() rosalind_data_file.close() rosalind_data_list[0] = rosalind_data_list[ 0][:-1] # trim off the new line char rosalind_data_list[0] = rosalind_data_list[ 0][:-3] # remove the stop codon from the end expected_output = "No valid stop codon found" # returns this error return print("Result: " + str(rna_translate(rosalind_data_list[0]) == expected_output)) print()
def test_rna_translate_invalid_codon(): print("Negative Test case #2: Invalid Codon") print("Note: Should see a warning about an invalid codon") rosalind_data_file = open("rosalind_data.txt", "r") rosalind_data_list = rosalind_data_file.readlines() rosalind_data_file.close() rosalind_data_list[0] = rosalind_data_list[ 0][:-1] # trim off the new line char rosalind_data_list[0] = "ABC" + rosalind_data_list[ 0] # add a nonsense codon to the beginning # should ignore the invalid codon output = rna_translate(rosalind_data_list[0]) print("Result: " + str(output == rosalind_data_list[1])) print()
def translate_RNA_splice(file): ''' (file) -> str File contains a dna sequence and introns in FASTA format Return the protein translation of the dna sequence after introns have been spliced >>> translate_RNA_splice('splice_example.txt') MVYIADKQHVASREAYGHMFKVCA* ''' # convert the FASTA sequences into a single string # store the single string sequences and their ID in a dictionnary ID_seq = {} myfile = open(file, 'r') for line in myfile: line = line.rstrip() if line == '': continue elif line.startswith('>'): ID_seq[line[1:]] = "" seq_name = line[1:] else: ID_seq[seq_name] += line # make a list with the single string sequences single_seq = [] for key in ID_seq: single_seq.append(ID_seq[key]) # make a list with the length of single_seq seq_length = [] for i in single_seq: seq_length.append(len(i)) # get the index of the longest single_seq longest = seq_length.index(max(seq_length)) # add each single_seq to the list of sequences starting with the longest sequences = [] sequences.append(single_seq[longest]) single_seq.remove(single_seq[longest]) for i in single_seq: sequences.append(i) # if the sequence is already a single string and not fasta the vode below is enough ## sequences = [] ## for line in myfile: ## line = line.rstrip() ## if not line.startswith('>'): ## sequences.append(line) # verify that the first sequence in the list is the dna sequence for i in range(1, len(sequences)): assert len(sequences[0]) >= len(sequences[i]), 'dna is not the first sequence' # make a list of tuple that includes the start and end positions of each intron dna = sequences[0] intron_positions = [] for i in range(1, len(sequences)): intron = sequences[i] intron_start = dna.index(intron) intron_end = intron_start + len(intron) -1 intron_positions.append((intron_start, intron_end)) # sort the list to get the introns in their order of appearance in dna intron_positions.sort() exons = [] # add the first exon to the spliced_dna exon1 = dna[0:intron_positions[0][0]] spliced_dna = exon1 # add the other exons using the intron_positions as coordinates for i in range(1, len(intron_positions)): exon = dna[intron_positions[i-1][1]+1:intron_positions[i][0]] spliced_dna += exon # add the last exon spliced_dna = spliced_dna + dna[intron_positions[-1][1]+1:] rna = spliced_dna.upper().replace('T', 'U') import rna_translate protein = rna_translate.rna_translate(rna) return protein