Ejemplo n.º 1
0
def align(fastq_file, pair_file, ref_file, names, align_dir, data):
    config = data["config"]
    out_prefix = os.path.join(align_dir, names["lane"])
    out_file = out_prefix + "Aligned.out.sam"
    out_dir = os.path.join(align_dir, "%s_star" % names["lane"])

    final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"]))
    if file_exists(final_out):
        return final_out
    star_path = config_utils.get_program("STAR", config)
    fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file
    num_cores = config["algorithm"].get("num_cores", 1)

    safe_makedir(align_dir)
    cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} "
           "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} "
           "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 "
           "--outStd SAM "
           "--outSAMunmapped Within --outSAMattributes %s" % " ".join(ALIGN_TAGS))
    cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd
    cmd += _read_group_option(names)
    fusion_mode = get_in(data, ("config", "algorithm", "fusion_mode"), False)
    if fusion_mode:
        cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15"
    strandedness = get_in(data, ("config", "algorithm", "strandedness"),
                          "unstranded").lower()
    if strandedness == "unstranded":
        cmd += " --outSAMstrandField intronMotif "
    sam_to_bam = bam.sam_to_bam_stream_cmd(config)
    sort = bam.sort_cmd(config)
    cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} "
    run_message = "Running STAR aligner on %s and %s." % (fastq_file, ref_file)
    with file_transaction(final_out) as tx_final_out:
        do.run(cmd.format(**locals()), run_message, None)
    return final_out
Ejemplo n.º 2
0
def align(fastq_file, pair_file, ref_file, names, align_dir, data):
    config = data["config"]
    out_prefix = os.path.join(align_dir, dd.get_lane(data))
    out_file = out_prefix + "Aligned.out.sam"
    out_dir = os.path.join(align_dir, "%s_star" % dd.get_lane(data))

    if not ref_file:
        logger.error(
            "STAR index not found. We don't provide the STAR indexes "
            "by default because they are very large. You can install "
            "the index for your genome with: bcbio_nextgen.py upgrade "
            "--aligners star --genomes genome-build-name --data")
        sys.exit(1)

    final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"]))
    if file_exists(final_out):
        data = _update_data(final_out, out_dir, names, data)
        return data

    star_path = config_utils.get_program("STAR", config)
    fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file
    num_cores = config["algorithm"].get("num_cores", 1)

    safe_makedir(align_dir)
    cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} "
           "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} "
           "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 "
           "--outStd SAM "
           "--outSAMunmapped Within --outSAMattributes %s" %
           " ".join(ALIGN_TAGS))
    cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd
    cmd += _read_group_option(names)
    fusion_mode = utils.get_in(data, ("config", "algorithm", "fusion_mode"),
                               False)
    if fusion_mode:
        cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15"
    strandedness = utils.get_in(data, ("config", "algorithm", "strandedness"),
                                "unstranded").lower()
    if strandedness == "unstranded":
        cmd += " --outSAMstrandField intronMotif "

    if dd.get_rsem(data) and not is_transcriptome_broken():
        cmd += " --quantMode TranscriptomeSAM "

    with tx_tmpdir(data) as tmp_dir:
        sam_to_bam = bam.sam_to_bam_stream_cmd(config)
        sort = bam.sort_cmd(config, tmp_dir)
        cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} "
        run_message = "Running STAR aligner on %s and %s" % (fastq_file,
                                                             ref_file)
        with file_transaction(data, final_out) as tx_final_out:
            do.run(cmd.format(**locals()), run_message, None)

    data = _update_data(final_out, out_dir, names, data)
    return data
Ejemplo n.º 3
0
def align(fastq_file, pair_file, ref_file, names, align_dir, data):
    config = data["config"]
    out_prefix = os.path.join(align_dir, dd.get_lane(data))
    out_file = out_prefix + "Aligned.out.sam"
    out_dir = os.path.join(align_dir, "%s_star" % dd.get_lane(data))

    if not ref_file:
        logger.error("STAR index not found. We don't provide the STAR indexes "
                     "by default because they are very large. You can install "
                     "the index for your genome with: bcbio_nextgen.py upgrade "
                     "--aligners star --genomes genome-build-name --data")
        sys.exit(1)

    final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"]))
    if file_exists(final_out):
        data = _update_data(final_out, out_dir, names, data)
        return data

    star_path = config_utils.get_program("STAR", config)
    fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file
    num_cores = config["algorithm"].get("num_cores", 1)

    safe_makedir(align_dir)
    cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} "
           "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} "
           "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 "
           "--outStd SAM "
           "--outSAMunmapped Within --outSAMattributes %s" % " ".join(ALIGN_TAGS))
    cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd
    cmd += _read_group_option(names)
    fusion_mode = utils.get_in(data, ("config", "algorithm", "fusion_mode"), False)
    if fusion_mode:
        cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15"
    strandedness = utils.get_in(data, ("config", "algorithm", "strandedness"),
                                "unstranded").lower()
    if strandedness == "unstranded":
        cmd += " --outSAMstrandField intronMotif "

    if dd.get_rsem(data) and not is_transcriptome_broken():
        cmd += " --quantMode TranscriptomeSAM "

    with tx_tmpdir(data) as tmp_dir:
        sam_to_bam = bam.sam_to_bam_stream_cmd(config)
        sort = bam.sort_cmd(config, tmp_dir)
        cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} "
        run_message = "Running STAR aligner on %s and %s" % (fastq_file, ref_file)
        with file_transaction(data, final_out) as tx_final_out:
            do.run(cmd.format(**locals()), run_message, None)

    data = _update_data(final_out, out_dir, names, data)
    return data
Ejemplo n.º 4
0
def align(fastq_file, pair_file, ref_file, names, align_dir, data):
    config = data["config"]
    out_prefix = os.path.join(align_dir, names["lane"])
    out_file = out_prefix + "Aligned.out.sam"
    out_dir = os.path.join(align_dir, "%s_star" % names["lane"])

    final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"]))
    if file_exists(final_out):
        return final_out
    star_path = config_utils.get_program("STAR", config)
    fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file
    num_cores = config["algorithm"].get("num_cores", 1)

    safe_makedir(align_dir)
    cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} "
           "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} "
           "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 "
           "--outStd SAM "
           "--outSAMunmapped Within --outSAMattributes %s" %
           " ".join(ALIGN_TAGS))
    cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd
    cmd += _read_group_option(names)
    fusion_mode = utils.get_in(data, ("config", "algorithm", "fusion_mode"),
                               False)
    if fusion_mode:
        cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15"
    strandedness = utils.get_in(data, ("config", "algorithm", "strandedness"),
                                "unstranded").lower()
    if strandedness == "unstranded":
        cmd += " --outSAMstrandField intronMotif "
    with tx_tmpdir(data) as tmp_dir:
        sam_to_bam = bam.sam_to_bam_stream_cmd(config)
        sort = bam.sort_cmd(config, tmp_dir)
        cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} "
        run_message = "Running STAR aligner on %s and %s." % (fastq_file,
                                                              ref_file)
        with file_transaction(data, final_out) as tx_final_out:
            do.run(cmd.format(**locals()), run_message, None)
    return final_out