Esempio n. 1
0
def multiSplit(files, globs):
    # Given a file and a number of splits (in this case, the number of processors), this function splits
    # the file into files with equal numbers of lines.
    import math

    file_info = files[1]
    #infilename, reffilename, outfilename, scaffs, globs = files[1];
    RC.printWrite(globs['logfilename'], globs['log-v'],
                  "+ Making tmp directory: " + globs['tmpdir'])
    os.makedirs(globs['tmpdir'])
    # Make the temporary directory to store the split files and split outputs.

    new_files = {}
    # The dictionary for the new temporary files.

    tmpfiles = [
        os.path.join(globs['tmpdir'],
                     str(i) + "-chunk.txt") for i in range(globs['num-procs'])
    ]
    # Generate the names of the tmp input files.

    num_lines = RC.getFileLen(file_info['in'])
    linespersplit = int(math.ceil(num_lines / float(globs['num-procs'])))
    # Count the number of lines in the input file and get the number of lines per split.

    with RC.getFileReader(file_info['in'])(file_info['in'], "r") as infile:
        file_lines, file_num = 0, 0
        tmpfile = open(tmpfiles[file_num], "w")
        for line in infile:
            tmpfile.write(line)
            file_lines += 1
            if file_lines == linespersplit:
                tmpfile.close()
                newoutfile = os.path.join(globs['tmpdir'],
                                          str(file_num) + "-chunk-out.txt")
                new_files[file_num] = {
                    'in': tmpfiles[file_num],
                    'out': newoutfile
                }
                file_lines = 0
                file_num += 1
                if file_num != len(tmpfiles):
                    tmpfile = open(tmpfiles[file_num], "w")
    # Read through every line in the input file and write it to one of the sub-files, updating the
    # subfile if we've reached the number of lines per split in that file.

    if len(new_files) != len(tmpfiles):
        tmpfile.close()
        newoutfile = os.path.join(globs['tmpdir'],
                                  str(file_num) + "-out.txt")
        new_files[file_num] = {
            'in': tmpfiles[file_num],
            'out': newoutfile
        }
    # If the last file has fewer lines than the rest it won't get added in the loop so we add it here.

    return new_files
Esempio n. 2
0
def mergeFiles(outfile, files, globs):
    # This function merges the tmp output files back into the main output file.
    import shutil
    with open(outfile, "w") as out:
        for file_num in sorted(files.keys()):
            with open(files[file_num]['out']) as infile:
                for line in infile:
                    out.write(line)
    try:
        RC.printWrite(globs['logfilename'], globs['log-v'],
                      "+ Removing tmp directory and files: " + globs['tmpdir'])
        shutil.rmtree(globs['tmpdir'])
    except:
        RC.printWrite(
            globs['logfilename'], globs['log-v'],
            "+ Could not remove tmp directory and files. User can remove manually: "
            + globs['tmpdir'])
Esempio n. 3
0
def startProg(globs):
    # A nice way to start the program.
    start_v = 1

    print("#")
    RC.printWrite(
        globs['logfilename'], 0,
        "# Welcome to Referee -- Reference genome quality score calculator.")
    RC.printWrite(
        globs['logfilename'], start_v, "# Version " + globs['version'] +
        " released on " + globs['releasedate'])
    RC.printWrite(globs['logfilename'], start_v,
                  "# Referee was developed by Gregg Thomas and Matthew Hahn")
    RC.printWrite(globs['logfilename'], start_v,
                  "# Citation:      " + globs['doi'])
    RC.printWrite(globs['logfilename'], start_v,
                  "# Website:       " + globs['http'])
    RC.printWrite(globs['logfilename'], start_v,
                  "# Report issues: " + globs['github'])
    RC.printWrite(globs['logfilename'], start_v, "#")
    RC.printWrite(globs['logfilename'], start_v,
                  "# The date and time at the start is: " + RC.getDateTime())
    RC.printWrite(
        globs['logfilename'], start_v,
        "# Using Python version:              " + globs['pyver'] + "\n#")
    RC.printWrite(globs['logfilename'], start_v,
                  "# The program was called as: " + " ".join(sys.argv) + "\n#")

    pad = 20
    RC.printWrite(globs['logfilename'], start_v, "# " + "-" * 125)
    RC.printWrite(globs['logfilename'], start_v, "# INPUT/OUTPUT INFO")
    RC.printWrite(globs['logfilename'], start_v,
                  RC.spacedOut("# Input file:", pad) + globs['in-file'])
    RC.printWrite(globs['logfilename'], start_v,
                  RC.spacedOut("# Reference file:", pad) + globs['ref-file'])
    RC.printWrite(globs['logfilename'], start_v,
                  RC.spacedOut("# Output directory:", pad) + globs['out-dir'])
    RC.printWrite(globs['logfilename'], start_v,
                  RC.spacedOut("# Output prefix:", pad) + globs['out-prefix'])

    RC.printWrite(globs['logfilename'], start_v, "# " + "-" * 125)
    RC.printWrite(globs['logfilename'], start_v, "# OPTIONS INFO")
    RC.printWrite(
        globs['logfilename'], start_v,
        RC.spacedOut("# Option", pad) + RC.spacedOut("Current setting", pad) +
        "Current action")
    RC.printWrite(globs['logfilename'], start_v, "# " + "-" * 125)

    if globs['pileup-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --pileup", pad) + RC.spacedOut("True", pad) +
            "Input type set to pileup. Referee will calculate genotype likelihoods."
        )
        if globs['mapq-opt']:
            RC.printWrite(
                globs['logfilename'], start_v,
                RC.spacedOut("# --mapq", pad) + RC.spacedOut("True", pad) +
                "Incorporating mapping qualities (7th column of pileup file) into quality score calculations if they are present."
            )
        else:
            RC.printWrite(
                globs['logfilename'], start_v,
                RC.spacedOut("# --mapq", pad) + RC.spacedOut("False", pad) +
                "Ignoring mapping qualities in pileup file if they are present."
            )
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --pileup", pad) + RC.spacedOut("False", pad) +
            "Input is pre-calculated genotype log likelihoods.")
        if globs['mapq-opt']:
            RC.printWrite(
                globs['logfilename'], start_v,
                RC.spacedOut("# --mapq", pad) + RC.spacedOut("True", pad) +
                "--pileup not set. Ignoring --mapq option.")
    # Reporting the pileup option.

    if globs['fastq-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --fastq", pad) + RC.spacedOut("True", pad) +
            "Writing output in FASTQ format in addition to tab delimited: " +
            globs['out-fq'])
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --fastq", pad) + RC.spacedOut("False", pad) +
            "Not writing output in FASTQ format.")
    # Reporting the fastq option.

    if globs['fasta-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --fasta", pad) + RC.spacedOut("True", pad) +
            "Writing corrected output in FASTA format in addition to tab delimited: "
            + globs['out-fq'])
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --fasta", pad) + RC.spacedOut("False", pad) +
            "Not writing corrected output in FASTA format.")
    # Reporting the fastq option.

    if globs['bed-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --bed", pad) + RC.spacedOut("True", pad) +
            "Writing output in BED format in addition to tab delimited: " +
            globs['bed-dir'])
        # Specifiy and create the BED directory, if necessary.
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --bed", pad) + RC.spacedOut("False", pad) +
            "Not writing output in BED format.")
    # Reporting the fastq option.

    if globs['mapped-only-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --mapped", pad) + RC.spacedOut("True", pad) +
            "Only calculating scores for positions with reads mapped to them.")
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --mapped", pad) + RC.spacedOut("False", pad) +
            "Calculating scores for every position in the reference genome.")
    # Reporting the mapped option.

    if globs['haploid-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --haploid", pad) + RC.spacedOut("True", pad) +
            "Calculating genotype likelihoods and quality scores for HAPLOID data (4 genotypes)."
        )
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --haploid", pad) + RC.spacedOut("False", pad) +
            "Calculating genotype likelihoods and quality scores for DIPLOID data (10 genotypes)."
        )
    # Reporting the haploid option.

    if globs['raw-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --raw", pad) + RC.spacedOut("True", pad) +
            "Printing raw Referee score in fourth column of tabbed output.")
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --raw", pad) + RC.spacedOut("False", pad) +
            "NOT printing raw Referee score in tabbed output.")
    # Reporting the correct option.

    if globs['correct-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --correct", pad) + RC.spacedOut("True", pad) +
            "Suggesting higher scoring alternative base when reference score is negative or reference base is N."
        )
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --correct", pad) + RC.spacedOut("False", pad) +
            "Not suggesting higher scoring alternative base when reference score is negative or reference base is N."
        )
    # Reporting the correct option.

    if not globs['quiet']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --quiet", pad) + RC.spacedOut("False", pad) +
            "Step info will be output while Referee is running.")
    else:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --quiet", pad) + RC.spacedOut("True", pad) +
            "No further information will be output while Referee is running.")
    # Reporting the correct option.

    RC.printWrite(
        globs['logfilename'], start_v,
        RC.spacedOut("# -p", pad) +
        RC.spacedOut(str(globs['num-procs']), pad) +
        "Referee will use this many processes to run.")
    # Reporting the number of processes specified.

    RC.printWrite(
        globs['logfilename'], start_v,
        RC.spacedOut("# -l", pad) +
        RC.spacedOut(str(globs['lines-per-proc']), pad) +
        "This many lines will be read per process to be calculated at one time in parallel"
    )
    # Reporting the lines per proc option.

    if globs['allcalc-opt']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --allcalcs", pad) + RC.spacedOut("True", pad) +
            "Using tab delimited output and reporting extra columns.")
    # Reporting the allcalc option.

    if globs['debug']:
        RC.printWrite(
            globs['logfilename'], start_v,
            RC.spacedOut("# --debug", pad) + RC.spacedOut("True", pad) +
            "Printing out a bit of debug info.")
    # Reporting the allcalc option.

    if not globs['pileup-opt']:
        RC.printWrite(globs['logfilename'], start_v, "#\n# " + "-" * 40)
        RC.printWrite(globs['logfilename'], start_v, "## IMPORTANT!")
        RC.printWrite(
            globs['logfilename'], start_v,
            "## Input columns: Scaffold\tPosition\tAA\tAC\tAG\tAT\tCC\tCG\tCT\tGG\tGT\tTT"
        )
        RC.printWrite(
            globs['logfilename'], start_v,
            "## Please ensure that your input genotype likelihood files are tab delimited with columns in this exact order without headers."
        )
        RC.printWrite(
            globs['logfilename'], start_v,
            "## Failure to do so will result in inaccurate calculations!!")
        RC.printWrite(globs['logfilename'], start_v, "# " + "-" * 40 + "\n#")

    if globs['quiet']:
        RC.printWrite(globs['logfilename'], start_v, "# " + "-" * 125)
        RC.printWrite(globs['logfilename'], start_v, "# Running...")
Esempio n. 4
0
def referee(globs):
    step_start_time = RC.report_step(globs, "", "", "", start=True)
    # Initialize the step headers

    step = "Detecting compression"
    step_start_time = RC.report_step(globs, step, False, "In progress...")
    globs['reader'] = RC.getFileReader(globs['in-file'])
    if globs['reader'] != open:
        globs['lread'] = RC.readGzipLine
        globs['read-mode'] = "rb"
    step_start_time = RC.report_step(globs, step, step_start_time, "Success!")
    #print("\n", globs['reader'], globs['lread'], globs['read-mode']);
    # Detect whether the input file is gzip compressed or not and save the appropriate functions.

    step = "Indexing reference FASTA"
    step_start_time = RC.report_step(globs, step, False, "In progress...")
    globs['ref'], prev_scaff = RC.fastaReadInd(globs['ref-file'], globs)
    step_start_time = RC.report_step(globs, step, step_start_time, "Success!")
    # Index the reference FASTA file.

    if globs['ref-index']:
        step = "Getting scaffold lengths from index"
        step_start_time = RC.report_step(globs, step, False, "In progress...")
        for line in open(globs['ref-index']):
            line = line.split("\t")
            globs['scaff-lens'][line[0]] = int(line[1])
    else:
        RC.printWrite(
            globs['logfilename'], globs['log-v'],
            "# WARNING 1: Cannot find reference index file (" +
            globs['ref-file'] + ".fai)")
        RC.printWrite(
            globs['logfilename'], globs['log-v'],
            "# WARNING 1: Will read reference scaffold lengths manually, which can take a few minutes."
        )
        step = "Getting scaffold lengths manually"
        step_start_time = RC.report_step(globs, step, False, "In progress...")
        for scaff in globs['ref']:
            seq = RC.fastaGet(globs['ref-file'], globs['ref'][scaff])[1]
            globs['scaff-lens'][scaff] = len(seq)
    step_start_time = RC.report_step(globs, step, step_start_time, "Success!")
    globs['num-pos'], globs['num-scaff'] = sum(
        globs['scaff-lens'].values()), len(globs['scaff-lens'])
    RC.printWrite(
        globs['logfilename'], globs['log-v'],
        "# Read " + str(globs['num-pos']) + " positions in " +
        str(globs['num-scaff']) + " scaffolds")
    # Getting scaffold lengths

    if globs['pileup-opt']:
        step = "Computing likelihood look-up table"
        step_start_time = RC.report_step(globs, step, False, "In progress...")
        globs['probs'] = CALC.glInit(globs['mapq-opt'], globs['haploid-opt'])
        step_start_time = RC.report_step(globs, step, step_start_time,
                                         "Success!")
    # Pre-compute the likelihoods for every base-quality (+ mapping quality if set) so they can
    # just be looked up for each position.

    with open(globs['out-tab'],
              "w") as outfile, mp.Pool(processes=globs['num-procs']) as pool:
        if globs['fastq-opt']:
            fastqfile = open(globs['out-fq'], "w")
        else:
            fastqfile = ""
        # Open the FASTQ file if --fastq is specified. Otherwise just set an empty string instead of the stream.

        if globs['fasta-opt']:
            fastafile = open(globs['out-fa'], "w")
        else:
            fastafile = ""
        # Open the FASTA file if --fasta is specified. Otherwise just set an empty string instead of the stream.

        if globs['bed-opt']:
            for line in globs['reader'](globs['in-file'], globs['read-mode']):
                first_scaff = globs['lread'](line)[0]
                break
            globs['cur-bed'] = OUT.initializeBed(first_scaff, globs)
        # Initialize first scaffold for BED output.

        cur_lines, outdicts = [], []
        i, i_start, next_pos = 0, 1, 1
        for line in globs['reader'](globs['in-file'], globs['read-mode']):
            i += 1
            cur_lines.append(line)
            if len(cur_lines) == globs['chunk-size']:
                step = "Processing lines " + str(i_start) + "-" + str(i)
                step_start_time = RC.report_step(globs, step, False,
                                                 "In progress...")

                i_start = i + 1
                line_chunks = list(
                    RC.chunks(cur_lines, globs['lines-per-proc']))
                for result in pool.starmap(CALC.refCalc,
                                           ((line_chunk, globs)
                                            for line_chunk in line_chunks)):
                    #outdicts += result;
                    for outdict in result:
                        # print(site);
                        # print(result[site]);
                        prev_scaff, next_pos, globs = OUT.outputDistributor(
                            outdict, prev_scaff, next_pos, outfile, fastqfile,
                            fastafile, globs)
                # for outdict in outdicts:
                # 	prev_scaff, next_pos, globs = OUT.outputDistributor(outdict, prev_scaff, next_pos, outfile, fastqfile, fastafile, globs);
                cur_lines, outdicts = [], []
                step_start_time = RC.report_step(globs, step, step_start_time,
                                                 "Success!")
        # Read the input file line by line. Once a certain number of lines have been read, pass them to siteParse in parallel.

        if cur_lines != []:
            step = "Processing lines " + str(i_start) + "-" + str(i)
            step_start_time = RC.report_step(globs, step, False,
                                             "In progress...")

            line_chunks = list(RC.chunks(cur_lines, globs['lines-per-proc']))
            for result in pool.starmap(CALC.refCalc,
                                       ((line_chunk, globs)
                                        for line_chunk in line_chunks)):
                #outdicts += result;
                for outdict in result:
                    # print(site);
                    # print(result[site]);
                    prev_scaff, next_pos, globs = OUT.outputDistributor(
                        outdict, prev_scaff, next_pos, outfile, fastqfile,
                        fastafile, globs)
            # for outdict in outdicts:
            # 	prev_scaff, next_pos, globs = OUT.outputDistributor(outdict, prev_scaff, next_pos, outfile, fastqfile, fastafile, globs);
            step_start_time = RC.report_step(globs, step, step_start_time,
                                             "Success!")
        # Read the input file line by line. Once a certain number of lines hav
        # Count the last chunk of lines if necessary.

        if next_pos <= globs['scaff-lens'][prev_scaff]:
            step = "Filling final unmapped positions"
            step_start_time = RC.report_step(globs, step, False,
                                             "In progress...")
            seq = RC.fastaGet(globs['ref-file'], globs['ref'][prev_scaff])[1]
            outdict = {
                'scaff': prev_scaff,
                'pos': globs['scaff-lens'][prev_scaff],
                'ref': seq[next_pos - 1],
                'rq': -2,
                'raw': "NA",
                'lr': "NA",
                'l_match': "NA",
                'l_mismatch': "NA",
                'gls': "NA",
                'cor_ref': "NA",
                'cor_score': "NA",
                'cor_raw': "NA"
            }
            prev_scaff, next_pos, globs = OUT.outputDistributor(
                outdict, prev_scaff, next_pos, outfile, fastqfile, fastafile,
                globs)
            step_start_time = RC.report_step(globs, step, step_start_time,
                                             "Success!")
        # If the last positions are unmapped they won't have been filled in. Do that here using the last position (length) of the
        # previous scaffold as the outdict.

        globs['scaffs-written'].append(prev_scaff)

        step = "Checking for unmapped scaffolds"
        step_start_time = RC.report_step(globs, step, False, "In progress...")
        for scaff in globs['scaff-lens']:
            if scaff not in globs['scaffs-written']:
                scaff_len = globs['scaff-lens'][scaff]
                seq = RC.fastaGet(globs['ref-file'], globs['ref'][scaff])[1]
                for p in range(len(seq)):
                    pos = p + 1
                    ref = seq[p]
                    outdict = {
                        'scaff': scaff,
                        'pos': pos,
                        'ref': ref,
                        'rq': -2,
                        'raw': "NA",
                        'lr': "NA",
                        'l_match': "NA",
                        'l_mismatch': "NA",
                        'gls': "NA",
                        'cor_ref': "NA",
                        'cor_score': "NA",
                        'cor_raw': "NA"
                    }
                    prev_scaff, next_pos, globs = OUT.outputDistributor(
                        outdict, scaff, next_pos, outfile, fastqfile,
                        fastafile, globs)
        step_start_time = RC.report_step(globs, step, step_start_time,
                                         "Success!")
        # If any scaffolds had no positions with reads mapped, they will not have been written. Go through them here to write out
        # their positions in ouput files with scores of -2.

        if globs['fastq-opt']:
            fastqfile.close()
        # Close the FASTQ file if --fastq was set.

        if globs['fasta-opt']:
            fastafile.close()
        # Close the FASTA file if --fasta was set.

        if globs['bed-opt']:
            step = "Writing final bed file"
            step_start_time = RC.report_step(globs, step, False,
                                             "In progress...")
            OUT.outputBed(globs['cur-bed'])
            step_start_time = RC.report_step(globs, step, step_start_time,
                                             "Success!")
        # Write out the last bed file.

        with open(globs['out-summary'], "w") as sumout:
            sumout.write("# SCAFFOLDS:\t" + str(globs['num-scaff']) + "\n")
            sumout.write("# POSITIONS:\t" + str(globs['num-pos']) + "\n")
            sumout.write("# UNMAPPED POSITIONS:\t" +
                         str(globs['hist'][2]['count']) + "\n")
            if globs['correct-opt']:
                sumout.write("# ERRORS CORRECTED:\t" +
                             str(globs['num-corrected']) + "\n")
                err_rate = globs['num-corrected'] / globs['num-pos']
                sumout.write("# ERROR RATE PER BASE:\t" + str(err_rate) + "\n")

                sumout.write("#\n# ERROR TYPES\n")
                sumout.write("from\tto\tcount\n")
                for err in globs['err-types']:
                    outline = err[0] + "\t" + err[1] + "\t" + str(
                        globs['err-types'][err])
                    sumout.write(outline + "\n")

            sumout.write("#\n# SCORE DISTRIBUTION\n")
            sumout.write("bin\tcount\n")
            for score_bin in globs['hist']:
                outline = [
                    globs['hist'][score_bin]['min'],
                    globs['hist'][score_bin]['max'],
                    globs['hist'][score_bin]['count']
                ]
                outline = [str(o) for o in outline]
                if outline[0] == outline[1]:
                    outline = outline[0] + "\t" + outline[2]
                else:
                    outline = outline[0] + "-" + outline[1] + "\t" + outline[2]
                sumout.write(outline + "\n")
        # Write the summary file.

    return
Esempio n. 5
0
def referee(files, globs, step_start_time):
    if globs['stats']:
        if globs['psutil']:
            import psutil
        step_start_time = RC.report_stats(globs,
                                          "Index ref fasta",
                                          step_start=step_start_time)
    # Initialize the stats output if --stats is set

    globs['ref'] = RC.fastaReadInd(globs['reffile'])
    # Index the reference FASTA file.

    if globs['pileup']:
        if globs['stats']:
            step_start_time = RC.report_stats(globs,
                                              "GL Init",
                                              step_start=step_start_time)
        globs['probs'] = CALC.glInit(globs['mapq'], globs['haploid'])

    if globs['stats']:
        file_start_time = RC.report_stats(globs,
                                          "Calcs",
                                          step_start=step_start_time)
    # --stats update.

    if globs['num-procs'] == 1:
        for file_num in files:
            result = CALC.refCalc((file_num, files[file_num], globs))
            if globs['stats']:
                step_start_time = RC.report_stats(
                    globs, "File " + str(result) + " calcs done",
                    file_start_time)
    # The serial version.
    else:
        if len(files) == 1:
            if globs['stats']:
                step_start_time = RC.report_stats(globs,
                                                  "Split files",
                                                  step_start=step_start_time)
            new_files = OP.multiSplit(files, globs)
        else:
            new_files = files
        # If multiple processors are available for 1 file, we split the file into chunks.

        pool = mp.Pool(processes=globs['num-procs'])
        if globs['stats'] and globs['psutil']:
            for result in pool.imap(RC.getSubPID, range(globs['num-procs'])):
                globs['pids'].append(result)
        for result in pool.imap(CALC.refCalc,
                                ((file_num, new_files[file_num], globs)
                                 for file_num in new_files)):
            if globs['stats']:
                step_start_time = RC.report_stats(
                    globs, "File " + str(result) + " calcs done",
                    file_start_time)
        # Creates the pool of processes and passes each file to one process to calculate scores on.

        if len(files) == 1:
            if globs['stats']:
                step_start_time = RC.report_stats(globs,
                                                  "Merge files",
                                                  step_start=step_start_time)
            OP.mergeFiles(files[1]['out'], new_files, globs)
        # Merges the split tmp files back into a single output file.
    # The parallel verison.

    if globs['stats']:
        file_start_time = RC.report_stats(globs,
                                          "Adding unmapped ",
                                          step_start=step_start_time)
    if not globs['mapped']:
        if globs['num-procs'] == 1 or len(files) == 1:
            for file_num in files:
                result = OUT.addUnmapped((file_num, files[file_num], globs))
                if globs['stats']:
                    step_start_time = RC.report_stats(
                        globs,
                        "File " + str(result) + " unmapped done",
                        step_start=file_start_time)
                    RC.printWrite(
                        globs['logfilename'], globs['log-v'],
                        "+ Renaming tmp file to output file: " +
                        files[result]['tmpfile'] + " -> " +
                        files[result]['out'])
                shutil.move(files[result]['tmpfile'], files[result]['out'])
        # Serial version to add unmapped sites.
        else:
            for result in pool.imap(OUT.addUnmapped,
                                    ((file_num, files[file_num], globs)
                                     for file_num in new_files)):
                if globs['stats']:
                    step_start_time = RC.report_stats(
                        globs,
                        "File " + str(result) + " unmapped done",
                        step_start=file_start_time)
                RC.printWrite(
                    globs['logfilename'], globs['log-v'],
                    "+ Renaming tmp file to output file: " +
                    files[result]['tmpfile'] + " -> " + files[result]['out'])
                shutil.move(files[result]['tmpfile'], files[result]['out'])

        # Parallel version to add unmapped sites.

    # If all positions are to be assigned a score, this fills in the unmapped positions. Requires one pass through of the output file.

    if globs['stats']:
        step_start_time = RC.report_stats(globs,
                                          "End program",
                                          step_start=step_start_time,
                                          stat_end=True)
    # A step update for --stats.

    return