def problem_prot(): ''' http://rosalind.info/problems/prot/ ''' from solutions.prot import prot f = readfile('rosalind_prot.txt') with f: for line in f: lineoutput(prot(line))
def orf(dna): ''' Open reading Frames Given: A DNA string s of length at most 1 kbp in FASTA format. Return: Every distinct candidate protein string that can be translated from ORFs of s. Strings can be returned in any order. >>> dna = ('AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAG' ... 'AGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG') >>> results = orf(dna) >>> for r in sorted(results): ... print r M MGMTPRLGLESLLE MLLGSFRLIPKETLIQVAGSSPCNLS MTPRLGLESLLE ''' rnadna = [rna(dna), rna(revc(dna))] frames, results = [], [] for r in rnadna: frames.append(r) frames.append(r[1:]) frames.append(r[2:]) for frame in frames: protein = prot(frame, stop=False) for i in xrange(len(protein)): if protein[i] == 'M': e = protein[i:].find('$') results.append(protein[i:i+e]) return [i for i in list(set(results)) if i != '']
def splc(dna, introns): ''' RNA Splicing Given: A DNA string s (of length at most 1 kbp) and a collection of substrings of s acting as introns. All strings are given in FASTA format. Return: A protein string resulting from transcribing and translating the exons of s. (Note: Only one solution will exist for the dataset provided.) >>> dna = ('ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATC' ... 'TCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGT' ... 'TTGCGCCTAG') >>> introns = ['ATCGGTCGAA', 'ATCGGTCGAGCGTGT'] >>> splc(dna, introns) 'MVYIADKQHVASREAYGHMFKVCA' ''' for intron in sorted(introns, key=len, reverse=True): dna = ''.join(dna.split(intron)) return prot(rna(dna))