def translate_6_frame(in_fasta, out_fasta):
    with open(out_fasta, 'w') as outfile:
        for h,s in FastaReader(in_fasta):
            for frame, seq in sixFrameTranslation(s).items():
                outfile.write(">"+h+" _frame_"+str(frame)+"\n")
                outfile.write(seq+"\n")
    return out_fasta
def pull_out_long_ORFs(bad_file, len_cutoff, outputdir):

    out_file = outputdir + os.path.splitext(os.path.basename(bad_file))[0] + "_TranslongORFS.fa"

    num_seqs = 0
    num_orfs = 0
    with open(out_file, "w") as outfile:
        for h, s in FastaReader(bad_file):
            num_seqs += 1
            translation = sixFrameTranslation(s)
            orfs = get_long_ORFS(translation, len_cutoff)
            for o in orfs:
                num_orfs += 1
                outfile.write(">" + h + o[0] + "\n")
                outfile.write(o[1] + "\n")

    print str(num_seqs) + " sequences translated into " + str(num_orfs) + " ORFs"

    return out_file
Beispiel #3
0
def pull_out_long_ORFs(bad_file, len_cutoff, outputdir):

    out_file = (outputdir + os.path.splitext(os.path.basename(bad_file))[0] +
                "_TranslongORFS.fa")

    num_seqs = 0
    num_orfs = 0
    with open(out_file, 'w') as outfile:
        for h, s in FastaReader(bad_file):
            num_seqs += 1
            translation = sixFrameTranslation(s)
            orfs = get_long_ORFS(translation, len_cutoff)
            for o in orfs:
                num_orfs += 1
                outfile.write(">" + h + o[0] + "\n")
                outfile.write(o[1] + "\n")

    print str(num_seqs) + " sequences translated into " + str(
        num_orfs) + " ORFs"

    return out_file
def split_easy_from_hard(inputfile, outputdir):
    seqCount = 0
    badSeqs = 0
    bad_lengths = []

    output_file = outputdir + os.path.splitext(os.path.basename(inputfile))[0] + "_translatedL2stops.fa"

    with open(output_file + "_BadSeqs", "w") as badfile:
        with open(output_file, "w") as outfile:

            for h, s in FastaReader(inputfile):
                stops = 9999
                translation = sixFrameTranslation(s)

                for frame in translation:
                    st = translation[frame].count("*")
                    if st < stops:
                        best = frame
                        stops = st

                if stops <= 2:
                    outfile.write(">" + h + " frame_" + str(best) + "\n")
                    outfile.write(translation[best] + "\n")
                else:
                    badSeqs += 1
                    bad_lengths.append(len(s))
                    badfile.write(">" + h + "\n")
                    badfile.write(s + "\n")

                seqCount += 1
    print (
        str((100.0 * badSeqs) / seqCount)
        + "percent or "
        + str(badSeqs)
        + " out of "
        + str(seqCount)
        + " were not translated."
    )

    return output_file, output_file + "_BadSeqs"
Beispiel #5
0
def split_easy_from_hard(inputfile, outputdir):
    seqCount = 0
    badSeqs = 0
    bad_lengths = []

    output_file = (outputdir +
                   os.path.splitext(os.path.basename(inputfile))[0] +
                   "_translatedL2stops.fa")

    with open(output_file + "_BadSeqs", 'w') as badfile:
        with open(output_file, 'w') as outfile:

            for h, s in FastaReader(inputfile):
                stops = 9999
                translation = sixFrameTranslation(s)

                for frame in translation:
                    st = translation[frame].count('*')
                    if st < stops:
                        best = frame
                        stops = st

                if stops <= 2:
                    outfile.write(">" + h + " frame_" + str(best) + "\n")
                    outfile.write(translation[best] + "\n")
                else:
                    badSeqs += 1
                    bad_lengths.append(len(s))
                    badfile.write(">" + h + "\n")
                    badfile.write(s + "\n")

                seqCount += 1
    print(
        str((100.0 * badSeqs) / seqCount) + "percent or " + str(badSeqs) +
        " out of " + str(seqCount) + " were not translated.")

    return output_file, output_file + "_BadSeqs"