def translate_6_frame(in_fasta, out_fasta): with open(out_fasta, 'w') as outfile: for h,s in FastaReader(in_fasta): for frame, seq in sixFrameTranslation(s).items(): outfile.write(">"+h+" _frame_"+str(frame)+"\n") outfile.write(seq+"\n") return out_fasta
def pull_out_long_ORFs(bad_file, len_cutoff, outputdir): out_file = outputdir + os.path.splitext(os.path.basename(bad_file))[0] + "_TranslongORFS.fa" num_seqs = 0 num_orfs = 0 with open(out_file, "w") as outfile: for h, s in FastaReader(bad_file): num_seqs += 1 translation = sixFrameTranslation(s) orfs = get_long_ORFS(translation, len_cutoff) for o in orfs: num_orfs += 1 outfile.write(">" + h + o[0] + "\n") outfile.write(o[1] + "\n") print str(num_seqs) + " sequences translated into " + str(num_orfs) + " ORFs" return out_file
def pull_out_long_ORFs(bad_file, len_cutoff, outputdir): out_file = (outputdir + os.path.splitext(os.path.basename(bad_file))[0] + "_TranslongORFS.fa") num_seqs = 0 num_orfs = 0 with open(out_file, 'w') as outfile: for h, s in FastaReader(bad_file): num_seqs += 1 translation = sixFrameTranslation(s) orfs = get_long_ORFS(translation, len_cutoff) for o in orfs: num_orfs += 1 outfile.write(">" + h + o[0] + "\n") outfile.write(o[1] + "\n") print str(num_seqs) + " sequences translated into " + str( num_orfs) + " ORFs" return out_file
def split_easy_from_hard(inputfile, outputdir): seqCount = 0 badSeqs = 0 bad_lengths = [] output_file = outputdir + os.path.splitext(os.path.basename(inputfile))[0] + "_translatedL2stops.fa" with open(output_file + "_BadSeqs", "w") as badfile: with open(output_file, "w") as outfile: for h, s in FastaReader(inputfile): stops = 9999 translation = sixFrameTranslation(s) for frame in translation: st = translation[frame].count("*") if st < stops: best = frame stops = st if stops <= 2: outfile.write(">" + h + " frame_" + str(best) + "\n") outfile.write(translation[best] + "\n") else: badSeqs += 1 bad_lengths.append(len(s)) badfile.write(">" + h + "\n") badfile.write(s + "\n") seqCount += 1 print ( str((100.0 * badSeqs) / seqCount) + "percent or " + str(badSeqs) + " out of " + str(seqCount) + " were not translated." ) return output_file, output_file + "_BadSeqs"
def split_easy_from_hard(inputfile, outputdir): seqCount = 0 badSeqs = 0 bad_lengths = [] output_file = (outputdir + os.path.splitext(os.path.basename(inputfile))[0] + "_translatedL2stops.fa") with open(output_file + "_BadSeqs", 'w') as badfile: with open(output_file, 'w') as outfile: for h, s in FastaReader(inputfile): stops = 9999 translation = sixFrameTranslation(s) for frame in translation: st = translation[frame].count('*') if st < stops: best = frame stops = st if stops <= 2: outfile.write(">" + h + " frame_" + str(best) + "\n") outfile.write(translation[best] + "\n") else: badSeqs += 1 bad_lengths.append(len(s)) badfile.write(">" + h + "\n") badfile.write(s + "\n") seqCount += 1 print( str((100.0 * badSeqs) / seqCount) + "percent or " + str(badSeqs) + " out of " + str(seqCount) + " were not translated.") return output_file, output_file + "_BadSeqs"