Beispiel #1
0
def compute_fasta_stats(formats, input_file, seqtype, priority):
    MIN_LENGTH='MIN_LENGTH'
    MAX_LENGTH='MAX_LENGTH'
    NUMSEQ='NUMSEQ' 
    TOTAL_LENGTH='TOTAL_LENGTH'
    AVG_LENGTH='AVG_LENGTH' 

    stats = { 
              MIN_LENGTH: 0,  
              MAX_LENGTH: 0,  
              NUMSEQ : 0,    
              TOTAL_LENGTH: 0,
              AVG_LENGTH : 0 
    }  

    """ min length """
    _MAX = 1000000000000
    stats[MAX_LENGTH] = -(_MAX)
    stats[MIN_LENGTH]= _MAX

    fastareader= FastaReader(input_file)

    """ process one fasta sequence at a time """
    lengths_str=""
    for record in fastareader:
        seqname = record.name
        seq = record.sequence
        length = len(seq)
        
        stats[NUMSEQ] += 1
        
        stats[AVG_LENGTH]  =  stats[AVG_LENGTH] + length

        if stats[MIN_LENGTH] > length:
           stats[MIN_LENGTH] = length

        if stats[MAX_LENGTH] < length:
           stats[MAX_LENGTH] = length



    
    if stats[NUMSEQ] > 0 :
      stats[AVG_LENGTH]  = stats[AVG_LENGTH]/stats[NUMSEQ]
    else:
      stats[AVG_LENGTH]  = 0



    #     printf("%s\tNumber of sequences in input file BEFORE QC (%s)\t%s\n" %(str(priority), opts.seqtype,  str(stats[NUMSEQ][BEFORE])) )

    #     printf("%s\tNumber of sequences AFTER QC (%s)\t%s\n" %(str(priority + 5), opts.seqtype, str(stats[NUMSEQ][AFTER])))
    printf(formats  %(str(priority + 5), str(stats[NUMSEQ])))
    printf("%s\t-min length\t%s\n" %(str(priority + 6), str(stats[MIN_LENGTH])) )
    printf("%s\t-avg length\t%s\n" %( str(priority + 7), str(int(stats[AVG_LENGTH]))))
    printf("%s\t-max length\t%s\n" %( str(priority + 8), str(stats[MAX_LENGTH])) )
    printf("%s\t-total base pairs (bp)\t%s\n" %( str(priority + 9), str(int(stats[AVG_LENGTH]* stats[NUMSEQ])) ))
Beispiel #2
0
def create_gff_faa(tempfile, gfffile, faafile):
    patt = re.compile(r'>(.*)_(\d+)_(\d+)_([+-])')
    idpatt = re.compile(r'.*_(\d+_\d+)')

    with open(gfffile, 'w') as gffout:
      with open(faafile, 'w') as faaout:
        fastareader = FastaReader(tempfile)
        for fasta in fastareader:
          res=patt.search(fasta.name)
          if res:
             #nameprint(res.group(1),res.group(2), res.group(3), res.group(4))
             orfname=res.group(1)
             start=res.group(2)
             end=res.group(3)
             strand=res.group(4)
             res=idpatt.search(orfname)
             id=''
             if res:
                id=res.group(1)
             attr = "ID=" + id + ";partial=00"
          fields=[orfname, 'FGS+', 'CDS', start, end, '0', strand, "0", attr]

          fprintf(faaout,'>' + orfname + "\n" + fasta.sequence+"\n")
          fprintf(gffout,'\t'.join(fields) +'\n')
Beispiel #3
0
def countNoOfSequencesInFile(file):
    fastareader = FastaReader(file)
    count = 0
    for record in fastareader:
        count += 1
    return count
Beispiel #4
0
def create_splits(outputdir,
                  listfilename,
                  input_filename,
                  maxMBytes,
                  maxSize,
                  splitPrefix='split',
                  splitSuffix=''):
    maxBytes = 1024 * 1024 * maxMBytes
    if splitSuffix:
        suffix = '.' + splitSuffix
    else:
        suffix = ''

    try:
        if path.exists(listfilename):
            listfile = open(listfilename, 'r')
            listfilenames = [x.strip() for x in listfile.readlines()]
            remove_files(outputdir, listfilenames)
            listfile.close()
    except IOError:
        print "Cannot read file " + listfilename + " !"
        sys.exit(0)

    try:
        listfile = open(listfilename, 'w')
    except IOError:
        print "Cannot read file " + listfilename + " !"
        sys.exit(0)

    fragments = []
    seq_beg_pattern = re.compile(">")
    splitno = 0
    currblocksize = 0
    currblockbyteSize = 0

    fastareader = FastaReader(input_filename)
    # Read sequences from sorted sequence file and write them to block files

    for name in fastareader:
        fragments.append(fastareader.seqname)
        fragments.append(fastareader.sequence)

        if currblocksize >= maxSize - 1 or currblockbyteSize >= maxBytes:
            splitfile = open(
                outputdir + PATHDELIM + splitPrefix + str(splitno) + suffix,
                'w')
            fprintf(splitfile, "%s", '\n'.join(fragments))
            fragments = []
            splitfile.close()
            # Add this block name to the blocklistfile
            fprintf(listfile, "%s\n", splitPrefix + str(splitno) + suffix)
            splitno += 1
            currblocksize = 0
            currblockbyteSize = 0
        else:
            currblocksize += 1
            currblockbyteSize += len(fastareader.sequence)

    if fragments:
        splitfile = open(
            outputdir + PATHDELIM + splitPrefix + str(splitno) + suffix, 'w')
        fprintf(splitfile, "%s", '\n'.join(fragments))
        splitfile.close()
        fragments = []
        fprintf(listfile, "%s\n", splitPrefix + str(splitno) + suffix)
        splitno += 1

    #Add this block name to the blocklistfile
    currblocksize = 0
    currblockbyteSize = 0

    listfile.close()
    return True
def main(argv, errorlogger=None, runstatslogger=None):
    global parser
    (opts, args) = parser.parse_args(argv)

    if not valid_arguments(opts, args):
        print(usage)
        sys.exit(0)

    min_length = 0
    #inputfile = open(opts.input_fasta,'r')
    outfile = open(opts.output_fasta, 'w')
    outfilefna = open(opts.output_fna, 'w')
    outfilefaa = open(opts.output_faa, 'w')
    outfilegff = open(opts.output_gff, 'w')

    logfile = open(opts.log_file, 'w')
    lengthsfile = open(opts.lengths_file, 'w')

    if opts.map_file:
        mapfile = open(opts.map_file, 'w')
    else:
        mapfile = None

    sample_name = opts.input_fasta
    sample_name = re.sub(r'^.*/', '', sample_name, re.I)
    sample_name = re.sub(r'^.*\\', '', sample_name, re.I)
    sample_name = re.sub(r'\.fasta$', '', sample_name, re.I)
    sample_name = re.sub(r'\.fna$', '', sample_name, re.I)
    sample_name = re.sub(r'\.faa$', '', sample_name, re.I)
    sample_name = re.sub(r'\.fas$', '', sample_name, re.I)
    sample_name = re.sub(r'\.fa$', '', sample_name, re.I)

    BEFORE = 'BEFORE'
    AFTER = 'AFTER'
    NUMSEQ = "#INFO\tNumber of sequences :"
    NUMSEQ_SHORTER = "@INFO\tNumber of sequences shorter than minimum length of sequences"
    AVG_LENGTH = "@INFO\tAverage length of sequences:"
    MIN_LENGTH = "@INFO\tMinimum length of sequences:"
    MAX_LENGTH = "@INFO\tMaximum length of sequences:"

    _MAX = 1000000000000
    stats = {
        MIN_LENGTH: {
            'BEFORE': _MAX,
            'AFTER': _MAX
        },
        MAX_LENGTH: {
            'BEFORE': 0,
            'AFTER': 0
        },
        NUMSEQ: {
            'BEFORE': 0,
            'AFTER': 0
        },
        NUMSEQ_SHORTER: {
            'BEFORE': 0,
            'AFTER': 0
        },
        AVG_LENGTH: {
            'BEFORE': 0,
            'AFTER': 0
        },
    }

    length_distribution = {}
    length_cumulative_distribution = {}

    for i in range(0, 31):
        length_distribution[i] = 0
        length_cumulative_distribution[i] = 0

    seq_count = 0
    allNames = dict()
    outputStr = ""
    outputLines = []
    fastareader = FastaReader(opts.input_fasta)
    """ process one fasta sequence at a time """
    lengths_str = ""
    for record in fastareader:
        seqname = record.name
        seq = record.sequence
        length = len(seq)

        index = int(len(seq) / 50)
        if index >= 30:
            index = 30

        length_distribution[index] += 1
        if length < stats[MIN_LENGTH][BEFORE]:
            stats[MIN_LENGTH][BEFORE] = length

        if length > stats[MAX_LENGTH][BEFORE]:
            stats[MAX_LENGTH][BEFORE] = length

        if length < MIN_LENGTH:
            stats[NUMSEQ_SHORTER][BEFORE] += 1

        stats[AVG_LENGTH][BEFORE] = stats[AVG_LENGTH][BEFORE] + length

        seqvalue = filter_sequence(seq)

        stats[NUMSEQ][BEFORE] += 1

        seqlen = len(seqvalue)
        if seqlen >= min_length:

            if len(lengths_str) > 100:
                fprintf(lengthsfile, "%s\n", lengths_str)
                lengths_str = str(seqlen)
            else:
                lengths_str += '\t' + str(seqlen)

            stats[NUMSEQ][AFTER] += 1
            stats[AVG_LENGTH][AFTER] = stats[AVG_LENGTH][AFTER] + seqlen
            if mapfile == None:
                fprintf(outfile, "%s\n", seqname)
            else:
                contigID = sample_name + '_' + str(seq_count)
                orfID = sample_name + '_' + str(seq_count) + "_0"

                fprintf(outfile, ">%s\n", contigID)
                fprintf(outfilefna, ">%s\n", orfID)
                fprintf(outfilefaa, ">%s\n", orfID)

                gffString = sample_name + '_' + str(seq_count)
                gffString += "\t" + "AMINO_ACID_SEQ"
                gffString += "\t" + "CDS"
                gffString += "\t" + "0"
                gffString += "\t" + str(3 * seqlen)
                gffString += "\t" + "0"
                gffString += "\t" + "+"
                gffString += "\t" + "0"
                gffString += "\t" + "ID=" + orfID + ";"
                gffString += "locus_tag=" + orfID + ";"
                gffString += "partial=00;"
                gffString += "orf_length=" + str(seqlen) + ";"
                gffString += "contig_length=" + str(3 * seqlen)

                fprintf(outfilegff, "%s\n", gffString)

                key = re.sub(r'^>', '', seqname)
                fprintf(
                    mapfile, "%s\n", sample_name + '_' + str(seq_count) +
                    '\t' + key + '\t' + str(seqlen))
                seq_count += 1

            fprintf(outfile, "%s\n", "DUMMY CONTIGS FOR AMINO ACID SEQUENCES")
            fprintf(outfilefna, "%s\n", "DUMMY ORFS FOR AMINO ACID SEQUENCES")
            fprintf(outfilefaa, "%s\n", seqvalue)

            if seqlen < stats[MIN_LENGTH][AFTER]:
                stats[MIN_LENGTH][AFTER] = seqlen

            if seqlen > stats[MAX_LENGTH][AFTER]:
                stats[MAX_LENGTH][AFTER] = seqlen

    fprintf(lengthsfile, "%s\n", lengths_str)

    if stats[NUMSEQ][BEFORE] > 0:
        stats[AVG_LENGTH][
            BEFORE] = stats[AVG_LENGTH][BEFORE] / stats[NUMSEQ][BEFORE]
    else:
        stats[AVG_LENGTH][BEFORE] = 0
    if stats[NUMSEQ][AFTER] > 0:
        stats[AVG_LENGTH][
            AFTER] = stats[AVG_LENGTH][AFTER] / stats[NUMSEQ][AFTER]
    else:
        stats[AVG_LENGTH][AFTER] = 0

    lengthsfile.close()
    outfile.close()
    outfilefna.close()
    outfilefaa.close()
    outfilegff.close()

    #inputfile.close()
    if mapfile != None:
        mapfile.close()
    """ min length """
    if stats[MIN_LENGTH][BEFORE] == _MAX:
        stats[MIN_LENGTH][BEFORE] = 0
    if stats[MIN_LENGTH][AFTER] == _MAX:
        stats[MIN_LENGTH][AFTER] = 0

    fprintf(logfile, "@INFO\tBEFORE\tAFTER\n")
    fprintf(
        logfile, "%s\n", NUMSEQ + '\t' + str(stats[NUMSEQ][BEFORE]) + '\t' +
        str(stats[NUMSEQ][AFTER]))
    fprintf(
        logfile, "%s\n",
        NUMSEQ_SHORTER + '\t' + str(stats[NUMSEQ_SHORTER][BEFORE]) + '\t' +
        str(stats[NUMSEQ_SHORTER][AFTER]))
    fprintf(
        logfile, "%s\n", AVG_LENGTH + '\t' + str(stats[AVG_LENGTH][BEFORE]) +
        '\t' + str(stats[AVG_LENGTH][AFTER]))
    fprintf(
        logfile, "%s\n", MIN_LENGTH + '\t' + str(stats[MIN_LENGTH][BEFORE]) +
        '\t' + str(stats[MIN_LENGTH][AFTER]))
    fprintf(
        logfile, "%s\n", MAX_LENGTH + '\t' + str(stats[MAX_LENGTH][BEFORE]) +
        '\t' + str(stats[MAX_LENGTH][AFTER]))
    fprintf(logfile, "@INFO\tLOW\tHIGH\tFREQUENCY\tCUMULATIVE_FREQUENCY\n")
    #    fprintf(logfile, "#   ---\t-----\t--------\t---------\t----------\n");

    i = 30
    length_cumulative_distribution[i] = length_cumulative_distribution[i]
    i -= 1
    while i >= 0:
        length_cumulative_distribution[i] = length_cumulative_distribution[
            i + 1] + length_distribution[i]
        i -= 1

    for i in range(0, 31):
        fprintf(logfile, "   %s\n", str(i*50) + '\t' + str((i+1)*50) + '\t' +\
                 str(length_distribution[i]) +'\t' + str(length_cumulative_distribution[i]) )

    logfile.close()

    seqtype = 'amino'
    """priority is used to sort the output to print in the right order"""
    priority = 2000

    if runstatslogger != None:
        runstatslogger.write(
            "%s\tSequences BEFORE Filtering (%s)\t%s\n" %
            (str(priority), seqtype, str(stats[NUMSEQ][BEFORE])))
        runstatslogger.write(
            "%s\tmin length\t%s\n" %
            (str(priority + 1), str(stats[MIN_LENGTH][BEFORE])))
        runstatslogger.write(
            "%s\tavg length\t%s\n" %
            (str(priority + 2), str(int(stats[AVG_LENGTH][BEFORE]))))
        runstatslogger.write(
            "%s\tmax length\t%s\n" %
            (str(priority + 3), str(stats[MAX_LENGTH][BEFORE])))
        runstatslogger.write(
            "%s\ttot length\t%s\n" %
            (str(priority + 4),
             str(int(stats[AVG_LENGTH][BEFORE] * stats[NUMSEQ][BEFORE]))))
        runstatslogger.write(
            "%s\tSequences AFTER Filtering (%s)\t%s\n" %
            (str(priority + 5), seqtype, str(stats[NUMSEQ][AFTER])))
        runstatslogger.write(
            "%s\tmin length\t%s\n" %
            (str(priority + 6), str(stats[MIN_LENGTH][AFTER])))
        runstatslogger.write(
            "%s\tavg length\t%s\n" %
            (str(priority + 7), str(int(stats[AVG_LENGTH][AFTER]))))
        runstatslogger.write(
            "%s\tmax length\t%s\n" %
            (str(priority + 8), str(stats[MAX_LENGTH][AFTER])))
        runstatslogger.write(
            "%s\ttot length\t%s\n" %
            (str(priority + 9),
             str(int(stats[AVG_LENGTH][AFTER] * stats[NUMSEQ][AFTER]))))
Beispiel #6
0
def main(argv, errorlogger=None, runstatslogger=None):
    global parser
    global errorcode

    (opts, args) = parser.parse_args(argv)

    if not valid_arguments(opts, args):
        print(usage)
        sys.exit(0)

    min_length = opts.min_length
    outfile = open(opts.output_fasta + '.tmp', 'w')
    logfile = open(opts.log_file, 'w')
    lengthsfile = open(opts.lengths_file + '.tmp', 'w')

    if opts.map_file:
        mapfile = open(opts.map_file, 'w')
    else:
        mapfile = None

    if opts.seqtype == 'nucleotide':
        errorcode = 1
    else:
        errorcode = 3

    sample_name = opts.input_fasta
    sample_name = re.sub(r'^.*/', '', sample_name, re.I)
    sample_name = re.sub(r'^.*\\', '', sample_name, re.I)
    sample_name = re.sub(r'\.fasta$', '', sample_name, re.I)
    sample_name = re.sub(r'\.fna$', '', sample_name, re.I)
    sample_name = re.sub(r'\.faa$', '', sample_name, re.I)
    sample_name = re.sub(r'\.fas$', '', sample_name, re.I)
    sample_name = re.sub(r'\.fa$', '', sample_name, re.I)

    BEFORE = 'BEFORE'
    AFTER = 'AFTER'
    NUMSEQ = "#INFO\tNumber of sequences :"
    NUMSEQ_SHORTER = "@INFO\tNumber of sequences shorter than minimum length of sequences"
    AVG_LENGTH = "@INFO\tAverage length of sequences:"
    MIN_LENGTH = "@INFO\tMinimum length of sequences:"
    MAX_LENGTH = "@INFO\tMaximum length of sequences:"

    _MAX = 1000000000000
    stats = {
        MIN_LENGTH: {
            'BEFORE': _MAX,
            'AFTER': _MAX
        },
        MAX_LENGTH: {
            'BEFORE': 0,
            'AFTER': 0
        },
        NUMSEQ: {
            'BEFORE': 0,
            'AFTER': 0
        },
        NUMSEQ_SHORTER: {
            'BEFORE': 0,
            'AFTER': 0
        },
        AVG_LENGTH: {
            'BEFORE': 0,
            'AFTER': 0
        },
    }

    length_distribution = {}
    length_cumulative_distribution = {}

    for i in range(0, 31):
        length_distribution[i] = 0
        length_cumulative_distribution[i] = 0

    seq_count = 0
    allNames = dict()
    outputStr = ""
    outputLines = []
    print(opts.input_fasta)
    fastareader = FastaReader(opts.input_fasta)
    """ process one fasta sequence at a time """
    lengths_str = ""
    for record in fastareader:
        seqname = record.name
        seq = record.sequence
        length = len(seq)

        index = int(len(seq) / 50)
        if index >= 30:
            index = 30

        length_distribution[index] += 1
        if length < stats[MIN_LENGTH][BEFORE]:
            stats[MIN_LENGTH][BEFORE] = length

        if length > stats[MAX_LENGTH][BEFORE]:
            stats[MAX_LENGTH][BEFORE] = length

        if length < min_length:
            stats[NUMSEQ_SHORTER][BEFORE] += 1

        stats[AVG_LENGTH][BEFORE] = stats[AVG_LENGTH][BEFORE] + length

        #stopped the filtering process seqvalue = filter_sequence(seq)
        seqvalue = seq.upper()

        stats[NUMSEQ][BEFORE] += 1

        seqlen = len(seqvalue)
        if seqlen >= min_length:

            if len(lengths_str) > 100:
                fprintf(lengthsfile, "%s\n", lengths_str)
                lengths_str = str(seqlen)
            else:
                lengths_str += '\t' + str(seqlen)

            stats[NUMSEQ][AFTER] += 1
            stats[AVG_LENGTH][AFTER] = stats[AVG_LENGTH][AFTER] + seqlen
            if mapfile == None:
                fprintf(outfile, "%s\n", seqname)
            else:
                fprintf(outfile, ">%s\n", sample_name + '_' + str(seq_count))
                key = re.sub(r'^>', '', seqname)
                fprintf(
                    mapfile, "%s\n", sample_name + '_' + str(seq_count) +
                    '\t' + key + '\t' + str(seqlen))
                seq_count += 1

            fprintf(outfile, "%s\n", seqvalue)

            if seqlen < stats[MIN_LENGTH][AFTER]:
                stats[MIN_LENGTH][AFTER] = seqlen

            if seqlen > stats[MAX_LENGTH][AFTER]:
                stats[MAX_LENGTH][AFTER] = seqlen

    fprintf(lengthsfile, "%s\n", lengths_str)

    if stats[NUMSEQ][BEFORE] > 0:
        stats[AVG_LENGTH][
            BEFORE] = stats[AVG_LENGTH][BEFORE] / stats[NUMSEQ][BEFORE]
    else:
        stats[AVG_LENGTH][BEFORE] = 0
    if stats[NUMSEQ][AFTER] > 0:
        stats[AVG_LENGTH][
            AFTER] = stats[AVG_LENGTH][AFTER] / stats[NUMSEQ][AFTER]
    else:
        stats[AVG_LENGTH][AFTER] = 0

    lengthsfile.close()
    outfile.close()

    rename(opts.output_fasta + ".tmp", opts.output_fasta)
    rename(opts.lengths_file + ".tmp", opts.lengths_file)

    #inputfile.close()
    if mapfile != None:
        mapfile.close()
    """ min length """
    if stats[MIN_LENGTH][BEFORE] == _MAX:
        stats[MIN_LENGTH][BEFORE] = 0
    if stats[MIN_LENGTH][AFTER] == _MAX:
        stats[MIN_LENGTH][AFTER] = 0

    fprintf(logfile, "@INFO\tBEFORE\tAFTER\n")
    fprintf(
        logfile, "%s\n", NUMSEQ + '\t' + str(stats[NUMSEQ][BEFORE]) + '\t' +
        str(stats[NUMSEQ][AFTER]))
    fprintf(
        logfile, "%s\n",
        NUMSEQ_SHORTER + '\t' + str(stats[NUMSEQ_SHORTER][BEFORE]) + '\t' +
        str(stats[NUMSEQ_SHORTER][AFTER]))
    fprintf(
        logfile, "%s\n", AVG_LENGTH + '\t' + str(stats[AVG_LENGTH][BEFORE]) +
        '\t' + str(stats[AVG_LENGTH][AFTER]))
    fprintf(
        logfile, "%s\n", MIN_LENGTH + '\t' + str(stats[MIN_LENGTH][BEFORE]) +
        '\t' + str(stats[MIN_LENGTH][AFTER]))
    fprintf(
        logfile, "%s\n", MAX_LENGTH + '\t' + str(stats[MAX_LENGTH][BEFORE]) +
        '\t' + str(stats[MAX_LENGTH][AFTER]))
    fprintf(logfile, "@INFO\tLOW\tHIGH\tFREQUENCY\tCUMULATIVE_FREQUENCY\n")
    #    fprintf(logfile, "#   ---\t-----\t--------\t---------\t----------\n");

    i = 30
    length_cumulative_distribution[i] = length_cumulative_distribution[i]
    i -= 1
    while i >= 0:
        length_cumulative_distribution[i] = length_cumulative_distribution[
            i + 1] + length_distribution[i]
        i -= 1

    for i in range(0, 31):
        fprintf(logfile, "   %s\n", str(i*50) + '\t' + str((i+1)*50) + '\t' +\
                 str(length_distribution[i]) +'\t' + str(length_cumulative_distribution[i]) )

    logfile.close()

    if opts.seqtype == 'nucleotide':
        priority = 1000
    else:
        priority = 2000

    if runstatslogger != None:
        if opts.seqtype == 'nucleotide':
            runstatslogger.write(
                "%s\tNumber of sequences in input file BEFORE QC (%s)\t%s\n" %
                (str(priority), opts.seqtype, str(stats[NUMSEQ][BEFORE])))
            runstatslogger.write(
                "%s\t-min length\t%s\n" %
                (str(priority + 1), str(stats[MIN_LENGTH][BEFORE])))
            runstatslogger.write(
                "%s\t-avg length\t%s\n" %
                (str(priority + 2), str(int(stats[AVG_LENGTH][BEFORE]))))
            runstatslogger.write(
                "%s\t-max length\t%s\n" %
                (str(priority + 3), str(stats[MAX_LENGTH][BEFORE])))
            runstatslogger.write(
                "%s\t-total base pairs (bp)\t%s\n" %
                (str(priority + 4),
                 str(int(stats[AVG_LENGTH][BEFORE] * stats[NUMSEQ][BEFORE]))))

            runstatslogger.write(
                "%s\tNumber of sequences AFTER QC (%s)\t%s\n" %
                (str(priority + 5), opts.seqtype, str(stats[NUMSEQ][AFTER])))
            runstatslogger.write(
                "%s\t-min length\t%s\n" %
                (str(priority + 6), str(stats[MIN_LENGTH][AFTER])))
            runstatslogger.write(
                "%s\t-avg length\t%s\n" %
                (str(priority + 7), str(int(stats[AVG_LENGTH][AFTER]))))
            runstatslogger.write(
                "%s\t-max length\t%s\n" %
                (str(priority + 8), str(stats[MAX_LENGTH][AFTER])))
            runstatslogger.write(
                "%s\t-total base pairs (bp)\t%s\n" %
                (str(priority + 9),
                 str(int(stats[AVG_LENGTH][AFTER] * stats[NUMSEQ][AFTER]))))
        else:
            runstatslogger.write(
                "%s\tNumber of translated ORFs BEFORE QC (%s)\t%s\n" %
                (str(priority), opts.seqtype, str(stats[NUMSEQ][BEFORE])))
            runstatslogger.write(
                "%s\t-min length\t%s\n" %
                (str(priority + 1), str(stats[MIN_LENGTH][BEFORE])))
            runstatslogger.write(
                "%s\t-avg length\t%s\n" %
                (str(priority + 2), str(int(stats[AVG_LENGTH][BEFORE]))))
            runstatslogger.write(
                "%s\t-max length\t%s\n" %
                (str(priority + 3), str(stats[MAX_LENGTH][BEFORE])))
            runstatslogger.write(
                "%s\t-total base pairs (bp)\t%s\n" %
                (str(priority + 4),
                 str(int(stats[AVG_LENGTH][BEFORE] * stats[NUMSEQ][BEFORE]))))
            runstatslogger.write(
                "%s\tNumber of tranlated ORFs AFTER QC (%s)\t%s\n" %
                (str(priority + 5), opts.seqtype, str(stats[NUMSEQ][AFTER])))
            runstatslogger.write(
                "%s\t-min length\t%s\n" %
                (str(priority + 6), str(stats[MIN_LENGTH][AFTER])))
            runstatslogger.write(
                "%s\t-avg length\t%s\n" %
                (str(priority + 7), str(int(stats[AVG_LENGTH][AFTER]))))
            runstatslogger.write(
                "%s\t-max length\t%s\n" %
                (str(priority + 8), str(stats[MAX_LENGTH][AFTER])))
            runstatslogger.write(
                "%s\t-total base pairs (bp)\t%s\n" %
                (str(priority + 9),
                 str(int(stats[AVG_LENGTH][AFTER] * stats[NUMSEQ][AFTER]))))