def __main__():
    if len(sys.argv) >= 4:
        glimmerfile = open(sys.argv [1], "r")
        sequence = open(sys.argv[2])
        orf2seq = open(sys.argv [3], "w")
    else:
        print "Missing input values."
        sys.exit()

    fastafile = Bio.SeqIO.parse(sequence, "fasta")

    sequences = {}
    for entry in fastafile:
        sequences[entry.description] = entry

    for line in glimmerfile:
        if line.startswith('>'):
            print line[1:].strip()
            entry = sequences[ line[1:].strip() ]
        else:
            orf_start = int(line[8:17])
            orf_end = int(line[18:26])

            orf_name = line[0:8]
            if orf_start <= orf_end:
                new_line = record(entry.seq[orf_start-1 : orf_end], id = orf_name, description = entry.description).format("fasta") + "\n"
            else:         
                new_line = record(entry.seq[orf_end-1 : orf_start].reverse_complement(), id = orf_name, description = entry.description).format("fasta") + "\n"
            orf2seq.write(new_line)

    orf2seq.close()
    glimmerfile.close()
Example #2
0
def glimmer2sequence(sequence_path,
                     glimmer_path,
                     output_path,
                     to_protein=False,
                     translation_table=11):

    fastafile = Bio.SeqIO.parse(open(sequence_path), "fasta")
    glimmerfile = open(glimmer_path, "r")
    orf2seq = open(output_path, "w")

    sequences = {}
    for entry in fastafile:
        sequences[entry.description] = entry

    for line in glimmerfile:
        if line.startswith('>'):
            entry = sequences[line[1:].strip()]
        else:
            columns = line.strip('\t').split()
            try:
                orf_start = int(columns[1])
                orf_end = int(columns[2])
            except:
                sys.stderr.write(
                    "Error: Failed to convert %s or %s to an integer. Is the input really a glimmer prediction file?\n"
                    % (columns[1], columns[2]))
                continue
            orf_name = columns[0]

            if orf_start <= orf_end:
                sequence = entry.seq[orf_start - 1:orf_end]
                if to_protein:
                    sequence = sequence.translate(to_stop=True,
                                                  table=translation_table)
                new_line = record(
                    sequence, id=orf_name,
                    description=entry.description).format("fasta") + "\n"
            else:
                sequence = entry.seq[orf_end -
                                     1:orf_start].reverse_complement()
                if to_protein:
                    sequence = sequence.translate(to_stop=True,
                                                  table=translation_table)
                new_line = record(
                    sequence, id=orf_name,
                    description=entry.description).format("fasta") + "\n"
            orf2seq.write(new_line)

    orf2seq.close()
    glimmerfile.close()
def glimmer2sequence(sequence_path, glimmer_path, output_path, to_protein = False, translation_table = 11):

    fastafile = Bio.SeqIO.parse(open(sequence_path), "fasta")
    glimmerfile = open(glimmer_path, "r")
    orf2seq = open(output_path, "w")

    sequences = {}
    for entry in fastafile:
        sequences[entry.description] = entry

    for line in glimmerfile:
        if line.startswith('>'):
            entry = sequences[ line[1:].strip() ]
        else:
            columns = line.strip('\t').split()
            try:
                orf_start = int(columns[1])
                orf_end = int(columns[2])
            except:
                sys.stderr.write("Error: Failed to convert %s or %s to an integer. Is the input really a glimmer prediction file?\n" % (columns[1], columns[2]))
                continue
            orf_name = columns[0]

            if orf_start <= orf_end:
                sequence = entry.seq[orf_start-1 : orf_end]
                if to_protein:
                    sequence = sequence.translate(to_stop=True, table = translation_table)
                new_line = record(sequence, id = orf_name, description = entry.description).format("fasta") + "\n"
            else:
                sequence = entry.seq[orf_end-1 : orf_start].reverse_complement()
                if to_protein:
                    sequence = sequence.translate(to_stop=True, table = translation_table)
                new_line = record(sequence, id = orf_name, description = entry.description).format("fasta") + "\n"
            orf2seq.write(new_line)

    orf2seq.close()
    glimmerfile.close()