Ejemplo n.º 1
0
def _run_minimap(reference_file, reads_files, num_proc, mode, out_file,
                 sam_output):
    #SAM_HEADER = "\'@PG|@HD|@SQ|@RG|@CO\'"
    work_dir = os.path.dirname(out_file)
    stderr_file = os.path.join(work_dir, "minimap.stderr")
    SORT_THREADS = "4"
    SORT_MEM = "4G" if os.path.getsize(
        reference_file) > 100 * 1024 * 1024 else "1G"

    cmdline = [MINIMAP_BIN, reference_file]
    cmdline.extend(reads_files)
    cmdline.extend(["-x", mode, "-t", str(num_proc)])

    #Produces gzipped SAM sorted by reference name. Since it's not sorted by
    #read name anymore, it's important that all reads have SEQ.
    #is sam_output not set, produces PAF alignment
    #a = SAM output, p = min primary-to-seconday score
    #N = max secondary alignments
    #--sam-hit-only = don't output unmapped reads
    #--secondary-seq = custom option to output SEQ for seqcondary alignment with hard clipping
    #-L: move CIGAR strings for ultra-long reads to the separate tag
    #-Q don't output fastq quality
    if sam_output:
        tmp_prefix = os.path.join(
            os.path.dirname(out_file),
            "sort_" + datetime.datetime.now().strftime("%y%m%d_%H%M%S"))
        cmdline.extend([
            "-a", "-p", "0.5", "-N", "10", "--sam-hit-only", "-L", "-Q",
            "--secondary-seq"
        ])
        cmdline.extend(
            ["|", SAMTOOLS_BIN, "view", "-T", reference_file, "-u", "-"])
        cmdline.extend([
            "|", SAMTOOLS_BIN, "sort", "-T", tmp_prefix, "-O", "bam", "-@",
            SORT_THREADS, "-l", "1", "-m", SORT_MEM
        ])
    else:
        pass  #paf output enabled by default

        #cmdline.extend(["|", "grep", "-Ev", SAM_HEADER])    #removes headers
        #cmdline.extend(["|", "sort", "-k", "3,3", "-T", work_dir,
        #                "--parallel=8", "-S", "4G"])
        #cmdline.extend(["|", "gzip", "-1"])

    #logger.debug("Running: " + " ".join(cmdline))
    try:
        devnull = open(os.devnull, "wb")
        #env = os.environ.copy()
        #env["LC_ALL"] = "C"
        subprocess.check_call(
            ["/bin/bash", "-c", "set -o pipefail; " + " ".join(cmdline)],
            stderr=open(stderr_file, "w"),
            stdout=open(out_file, "w"))
        os.remove(stderr_file)

    except (subprocess.CalledProcessError, OSError) as e:
        logger.error(
            "Error running minimap2, terminating. See the alignment error log "
            " for details: " + stderr_file)
        raise AlignmentException(str(e))