index += 1 entry += line elif line.find('ORIGIN') >= 0: #found sequence start, set the flag on and parses the last entry is_seq = True genes.append(parse_entry(entry)) elif is_seq == True: #if flag is true keep going, usually sequences are store at the end of the file line = line.split() sequence.append(line) else: #this is an entry so append line entry += line str_seq = '' #make the sequence a string for i in sequence: str_seq += ''.join(i[1:]).upper() for i in genes: if len(i.gi_id) > 2: print i.id, i.start, i.end output = open(i.gi_id + '.DNA.fasta', 'w') output.write('>' + i.gi_id + '\t' + i.id + '\n') # if this is a complement, print both 5'-3' and reverse complement sequences if i.complement == True: output.write(fasta.format_output(fasta.invert(str_seq[int(i.start)-1:int(i.end)]), 80) + '\n') else: if not i.start.find('join') >= 0: output.write(fasta.format_output(str_seq[int(i.start)-1:int(i.end)], 80))
proteins = [] index = 0 entry = '' for line in gbfile: if line.find(' gene ') >= 0: if index >= 1: #parses the CDS and appends to a list proteins.append(parse_entry(entry)) entry = '' index += 1 entry += line elif line.find('ORIGIN') >= 0: #found the DNA sequence, we can stop now break else: entry += line #parses the last entry after leaving the loop proteins.append(parse_entry(entry)) #output for i in proteins: if len(i.gi) > 2: print i.gi, i.id output = open(i.gi + '.fasta', 'w') output.write('>' + i.gi + '\t' + i.id + '\n') i.sequence = i.sequence.replace('\"', '') output.write(fasta.format_output(i.sequence, 80)) print i.id
genes.append(parse_entry(entry)) elif is_seq == True: #if flag is true keep going, usually sequences are store at the end of the file line = line.split() sequence.append(line) else: #this is an entry so append line entry += line str_seq = '' #make the sequence a string for i in sequence: str_seq += ''.join(i[1:]).upper() for i in genes: if len(i.gi_id) > 2: print i.id, i.start, i.end output = open(i.gi_id + '.DNA.fasta', 'w') output.write('>' + i.gi_id + '\t' + i.id + '\n') # if this is a complement, print both 5'-3' and reverse complement sequences if i.complement == True: output.write( fasta.format_output( fasta.invert(str_seq[int(i.start) - 1:int(i.end)]), 80) + '\n') else: if not i.start.find('join') >= 0: output.write( fasta.format_output(str_seq[int(i.start) - 1:int(i.end)], 80))
#!/usr/bin/env python #import two modules import dnatranslate import fasta import sys #read the fasta file in one line: open the file, read the contents #and send it to the fasta reading function dna = fasta.read_fasta(open(sys.argv[1], 'r').readlines()) for item in dna: #translate the DNA protein = dnatranslate.translate_dna(item.sequence) print item.name #format and print the protein print fasta.format_output(protein, 60)