def main(): import argparse # Create a command-line parser object parser = argparse.ArgumentParser() default_start_codons = ['AUG'] default_stop_codons = ['UAA', 'UAG', 'UGA'] # Tell the parser what command-line arguments this script can receive parser.add_argument('sequence', metavar = 'SEQUENCE', type = str, help = ("The sequence to search for an open reading frame to be translated. " "If the path flag (\'-p\'/\'--path\') is specified, " "then this should be a path to a file containing the " "sequence to be searched.")) parser.add_argument('-p', '--path', action = 'store_true', help = ('The sequence argument should be treated as a path to a ' 'file containing the sequence to be searched.')) parser.add_argument('-s', '--start-codon', type = str, action = 'append', #append each argument to a list default = None, help = ('A start codon. This option can be used multiple times ' 'if htere are multiple start codons. ' 'Default: {0}.'.format(" ".join(default_start_codons)))) parser.add_argument('-x', '--stop-codon', type = str, action = 'append', # append each argument to a list default = None, help = ('A stop codon. This option can be used multiple times ' 'if there are multiple stop codons. ' 'Default: {0}.'.format(" ".join(default_stop_codons)))) # Parse the command-line arguments into a 'dict'-like container args = parser.parse_args() # Check to see if the path option was set to True by the caller. If so, parse the sequence from the path. if args.path: sequence = find_orf.parse_sequence_from_path(args.sequence) else: sequence = args.sequence # Check to see if start/stop codons were provided by the caller. If not, use the defaults. if not args.start_codon: args.start_codon = default_start_codons if not args.stop_codon: args.stop_codon = default_stop_codons genetic_code = {'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L', 'GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'} orf = find_orf.find_first_orf(sequence = sequence, start_codons = args.start_codon, stop_codons = args.stop_codon) translated_orf = translate.translate_sequence(orf, genetic_code) sys.stdout.write('{}\n'.format(translated_orf))
def main(): import argparse from find_orf import find_first_orf from translate import translate_sequence from find_orf import parse_sequence_from_path # Create a command-line parser object parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Tell the parser what command-line arguments this script can receive parser.add_argument( 'sequence', metavar='SEQUENCE', type=str, help=('The sequence to search for an open-reading frame. ' 'If the path flag (\'-p\'/\'--path\') is specified, ' 'then this should be a path to a file containing the ' 'sequence to be searched.')) parser.add_argument( '-p', '--path', action='store_true', help=('The sequence argument should be treated as a path to a ' 'containing the sequence to be searched.')) parser.add_argument( '-s', '--start-codons', type=str, nargs='+', # one or more arguments default=['AUG'], help=('One or more possible start codons.')) parser.add_argument( '-x', '--stop-codons', type=str, nargs='+', # one or more arguments default=['UAA', 'UAG', 'UGA'], help=('One or more possible stop codons.')) # Parse the command-line arguments into a 'dict'-like container args = parser.parse_args() # Check to see if the path option was set to True by the caller. If so, parse # the sequence from the path if args.path: sequence = parse_sequence_from_path(args.sequence) else: sequence = args.sequence orf = find_first_orf(sequence=sequence, start_codons=args.start_codons, stop_codons=args.stop_codons) translation = translate_sequence(orf) sys.stdout.write('{}\n'.format(translation))
def main(): orf_parse = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) # Tell the parser what command-line arguments this script can receive parser.add_argument('sequence', metavar = 'SEQUENCE', type = str, help = ('The sequence to search for an open-reading frame. ' 'If the path flag (\'-p\'/\'--path\') is specified, ' 'then this should be a path to a file containing the ' 'sequence to be searched.')) parser.add_argument('-p', '--path', action = 'store_true', help = ('The sequence argument should be treated as a path to a ' 'containing the sequence to be searched.')) parser.add_argument('-s', '--start-codons', type = str, nargs = '+', # one or more arguments default = ['AUG'], help = ('One or more possible start codons.')) parser.add_argument('-x', '--stop-codons', type = str, nargs = '+', # one or more arguments default = ['UAA', 'UAG', 'UGA'], help = ('One or more possible stop codons.')) # Parse the command-line arguments into a 'dict'-like container args = parser.parse_args() # Check to see if the path option was set to True by the caller. If so, parse # the sequence from the path if args.path: sequence = parse_sequence_from_path(args.sequence) #NEW else: sequence = args.sequence orf = find_orf.find_first_orf(sequence = sequence, start_codons = args.start_codons, stop_codons = args.stop_codons) genetic_code = {'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L','GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'} translation = translate.translate_sequence(rna_sequence = orf, genetic_code = genetic_code) sys.stdout.write('{}\n'.format(translating_time)) if __name__ == '__main__': main()
def main(): import argparse import find_orf import translate parser = argparse.ArgumentParser( formatter_class = argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('sequence', metavar = 'SEQUENCE', type = str, help = ('The sequence to search for an open-reading frame. ' 'If the path flag (\'-p\'/\'--path\') is specified, ' 'then this should be a path to a file containing the ' 'sequence to be searched.')) parser.add_argument('-p', '--path', action = 'store_true', help = ('The sequence argument should be treated as a path to a ' 'containing the sequence to be searched.')) parser.add_argument('-s', '--start-codons', type = str, nargs = '+', # one or more arguments default = ['AUG'], help = ('One or more possible start codons.')) parser.add_argument('-x', '--stop-codons', type = str, nargs = '+', # one or more arguments default = ['UAA', 'UAG', 'UGA'], help = ('One or more possible stop codons.')) args = parser.parse_args() if args.path: sequence = parse_sequence_from_path(args.sequence) else: sequence = args.sequence orf = find_orf.find_first_orf(sequence = sequence, start_codons = args.start_codons, stop_codons = args.stop_codons) genetic_code = {'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L', 'GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'} translation = translate.translate_sequence(rna_sequence = orf, genetic_code = genetic_code) sys.stdout.write('{}\n'.format(translation))
def main(): import argparse # Create a command-line parser object parser = argparse.ArgumentParser() default_start_codons = ['AUG'] default_stop_codons = ['UAA', 'UAG', 'UGA'] # Tell the parser what command-line arguments this script can receive parser.add_argument( 'sequence', metavar='SEQUENCE', type=str, help=('The sequence to search for an open-reading frame. ' 'If the path flag (\'-p\'/\'--path\') is specified, ' 'then this should be a path to a file containing the ' 'sequence to be searched.')) parser.add_argument( '-p', '--path', action='store_true', help=('The sequence argument should be treated as a path to a ' 'containing the sequence to be searched.')) parser.add_argument( '-s', '--start-codon', type=str, action='append', # append each argument to a list default=None, help=('A start codon. This option can be used multiple times ' 'if there are multiple start codons. ' 'Default: {0}.'.format(" ".join(default_start_codons)))) parser.add_argument( '-x', '--stop-codon', type=str, action='append', # append each argument to a list default=None, help=('A stop codon. This option can be used multiple times ' 'if there are multiple stop codons. ' 'Default: {0}.'.format(" ".join(default_stop_codons)))) # Parse the command-line arguments into a 'dict'-like container args = parser.parse_args() # Check to see if the path option was set to True by the caller. If so, parse # the sequence from the path if args.path: sequence = find_orf.parse_sequence_from_path(args.sequence) else: sequence = args.sequence # Check to see if start/stop codons were provided by the caller. If not, # use the defaults. if not args.start_codon: args.start_codon = default_start_codons if not args.stop_codon: args.stop_codon = default_stop_codons orf = find_orf.find_first_orf(sequence=sequence, start_codons=args.start_codon, stop_codons=args.stop_codon) aa = find_translate_orf(sequence=sequence, start_codons=args.start_codon, stop_codons=args.stop_codon) sys.stdout.write('{}\n'.format(aa))
def find_translate_orf( sequence, start_codons=['AUG'], stop_codons=['UAA', 'UAG', 'UGA'], ): ''' Find and translate the first open reading frame of a DNA or RNA sequence. If there are no ORFs in the sequence, an empty string is returned. Parameters ---------- sequence : str A string of DNA or RNA sequence (upper or lower-case) start_codons : list of strings All possible start codons. Each codon must be a string of 3 RNA bases, upper or lower-case. stop_codons : list of strings All possible stop codons. Each codon must be a string of 3 RNA bases, upper or lower-case. Returns ------- str A string of amino acids encoded by DNA or RNA string, starting with a start codon and ending with the first stop codon. ''' genetic_code = { 'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L', 'GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F' } #find_orf.find_first_orf('sequence', ['AUG'], ['UAA']) orf = find_orf.find_first_orf(sequence, start_codons=start_codons, stop_codons=stop_codons) aa = translate.translate_sequence(orf, genetic_code) return aa
def translate_first_orf( sequence, start_codons=['AUG'], stop_codons=['UAA', 'UAG', 'UGA'], genetic_code={ 'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L', 'GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F' }, ): """ Translate the first open-reading frame in the DNA or RNA `sequence`. An open-reading frame (ORF) is the part of an RNA sequence that is translated into a peptide. It must begin with a start codon, followed by zero or more codons (triplets of nucleotides), and end with a stop codon. If there are no ORFs in the sequence, an empty string is returned. Parameters ---------- sequence : str A string representing a DNA or RNA sequence (upper or lower-case) start_codons : list of strings All possible start codons. Each codon must be a string of 3 RNA bases, upper or lower-case. stop_codons : list of strings All possible stop codons. Each codon must be a string of 3 RNA bases, upper or lower-case. genetic_code : dict A dictionary of the genetic code, mapping codons to single-letter abbreviations for amino acids. Returns ------- str An uppercase string of the amino acid encoded by the first ORF found in the `sequence` that starts with any one of the `start_codons` and ends with any one of the `stop_codons`. If no ORF is found an empty string is returned. Examples -------- When the whole RNA sequence is an ORF: >>> translate_first_orf('AUGGUAUAA', ['AUG'], ['UAA']) 'MV' When the whole DNA sequence is an ORF: >>> translate_first_orf('ATGGTATAA', ['AUG'], ['UAA']) 'MV' When there is no ORF: >>> translate_first_orf('CUGGUAUAA', ['AUG'], ['UAA']) '' When there are bases before and after ORF: >>> translate_first_orf('CCAUGGUAUAACC', ['AUG'], ['UAA']) 'MV' """ ## import the find orf script from the find_orf.py script then use the translate method to convert this ## into an amino acid sequence orf = find_orf.find_first_orf(sequence, start_codons=start_codons, stop_codons=stop_codons) aa_seq = translate.translate_sequence(orf, genetic_code) return aa_seq
'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F' } if args.path: sequence = find_orf.parse_sequence_from_path(args.sequence) else: sequence = args.sequence orf = find_orf.find_first_orf(sequence, start_codons=args.start_codons, stop_codons=args.stop_codons) translation = translate.translate_sequence(orf, genetic_code) print('Your (specified) start codon(s): ' + str(args.start_codons)) print('Your (specified) stop codon(s): ' + str(args.stop_codons)) print('Your sequence was: ' + sequence) print('The open reading frame found was: ' + orf) print('The translation of the reading frame is:\n' + translation)
def translate_first_orf( start_codons=['AUG'], stop_codons=['UAA', 'UGA', 'UAG'], genetic_code={ 'GUC': 'V', 'ACC': 'T', 'GUA': 'V', 'GUG': 'V', 'ACU': 'T', 'AAC': 'N', 'CCU': 'P', 'UGG': 'W', 'AGC': 'S', 'AUC': 'I', 'CAU': 'H', 'AAU': 'N', 'AGU': 'S', 'GUU': 'V', 'CAC': 'H', 'ACG': 'T', 'CCG': 'P', 'CCA': 'P', 'ACA': 'T', 'CCC': 'P', 'UGU': 'C', 'GGU': 'G', 'UCU': 'S', 'GCG': 'A', 'UGC': 'C', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 'UCG': 'S', 'AGG': 'R', 'GGG': 'G', 'UCC': 'S', 'UCA': 'S', 'UAA': '*', 'GGA': 'G', 'UAC': 'Y', 'GAC': 'D', 'UAG': '*', 'AUA': 'I', 'GCA': 'A', 'CUU': 'L', 'GGC': 'G', 'AUG': 'M', 'CUG': 'L', 'GAG': 'E', 'CUC': 'L', 'AGA': 'R', 'CUA': 'L', 'GCC': 'A', 'AAA': 'K', 'AAG': 'K', 'CAA': 'Q', 'UUU': 'F', 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'GCU': 'A', 'GAA': 'E', 'AUU': 'I', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F' }, ): """ Translate first open reading frame in a sequence of RNA. Parameters: ------------------- sequence : a string containing the RNA bases composing the open reading frame to be translated start_codons : list of strings containing start codons stop_codons : list of strings containing stop codons genetic_code : a dictionary containing the genetic code that provides the single letter amino acid abbreviation for each triplet of bases composing a codon """ orf = find_orf.find_first_orf(sequence, start_codons=start_codons, stop_codons=stop_codons) amino_acid_seq = translate.translate_sequence(orf, genetic_code) return amino_acid_seq