def align(fastq_file, pair_file, ref_file, names, align_dir, data): config = data["config"] out_prefix = os.path.join(align_dir, names["lane"]) out_file = out_prefix + "Aligned.out.sam" out_dir = os.path.join(align_dir, "%s_star" % names["lane"]) final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"])) if file_exists(final_out): return final_out star_path = config_utils.get_program("STAR", config) fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file num_cores = config["algorithm"].get("num_cores", 1) safe_makedir(align_dir) cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} " "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} " "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 " "--outStd SAM " "--outSAMunmapped Within --outSAMattributes %s" % " ".join(ALIGN_TAGS)) cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd cmd += _read_group_option(names) fusion_mode = get_in(data, ("config", "algorithm", "fusion_mode"), False) if fusion_mode: cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15" strandedness = get_in(data, ("config", "algorithm", "strandedness"), "unstranded").lower() if strandedness == "unstranded": cmd += " --outSAMstrandField intronMotif " sam_to_bam = bam.sam_to_bam_stream_cmd(config) sort = bam.sort_cmd(config) cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} " run_message = "Running STAR aligner on %s and %s." % (fastq_file, ref_file) with file_transaction(final_out) as tx_final_out: do.run(cmd.format(**locals()), run_message, None) return final_out
def align(fastq_file, pair_file, ref_file, names, align_dir, data): config = data["config"] out_prefix = os.path.join(align_dir, dd.get_lane(data)) out_file = out_prefix + "Aligned.out.sam" out_dir = os.path.join(align_dir, "%s_star" % dd.get_lane(data)) if not ref_file: logger.error( "STAR index not found. We don't provide the STAR indexes " "by default because they are very large. You can install " "the index for your genome with: bcbio_nextgen.py upgrade " "--aligners star --genomes genome-build-name --data") sys.exit(1) final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"])) if file_exists(final_out): data = _update_data(final_out, out_dir, names, data) return data star_path = config_utils.get_program("STAR", config) fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file num_cores = config["algorithm"].get("num_cores", 1) safe_makedir(align_dir) cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} " "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} " "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 " "--outStd SAM " "--outSAMunmapped Within --outSAMattributes %s" % " ".join(ALIGN_TAGS)) cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd cmd += _read_group_option(names) fusion_mode = utils.get_in(data, ("config", "algorithm", "fusion_mode"), False) if fusion_mode: cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15" strandedness = utils.get_in(data, ("config", "algorithm", "strandedness"), "unstranded").lower() if strandedness == "unstranded": cmd += " --outSAMstrandField intronMotif " if dd.get_rsem(data) and not is_transcriptome_broken(): cmd += " --quantMode TranscriptomeSAM " with tx_tmpdir(data) as tmp_dir: sam_to_bam = bam.sam_to_bam_stream_cmd(config) sort = bam.sort_cmd(config, tmp_dir) cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} " run_message = "Running STAR aligner on %s and %s" % (fastq_file, ref_file) with file_transaction(data, final_out) as tx_final_out: do.run(cmd.format(**locals()), run_message, None) data = _update_data(final_out, out_dir, names, data) return data
def align(fastq_file, pair_file, ref_file, names, align_dir, data): config = data["config"] out_prefix = os.path.join(align_dir, dd.get_lane(data)) out_file = out_prefix + "Aligned.out.sam" out_dir = os.path.join(align_dir, "%s_star" % dd.get_lane(data)) if not ref_file: logger.error("STAR index not found. We don't provide the STAR indexes " "by default because they are very large. You can install " "the index for your genome with: bcbio_nextgen.py upgrade " "--aligners star --genomes genome-build-name --data") sys.exit(1) final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"])) if file_exists(final_out): data = _update_data(final_out, out_dir, names, data) return data star_path = config_utils.get_program("STAR", config) fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file num_cores = config["algorithm"].get("num_cores", 1) safe_makedir(align_dir) cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} " "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} " "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 " "--outStd SAM " "--outSAMunmapped Within --outSAMattributes %s" % " ".join(ALIGN_TAGS)) cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd cmd += _read_group_option(names) fusion_mode = utils.get_in(data, ("config", "algorithm", "fusion_mode"), False) if fusion_mode: cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15" strandedness = utils.get_in(data, ("config", "algorithm", "strandedness"), "unstranded").lower() if strandedness == "unstranded": cmd += " --outSAMstrandField intronMotif " if dd.get_rsem(data) and not is_transcriptome_broken(): cmd += " --quantMode TranscriptomeSAM " with tx_tmpdir(data) as tmp_dir: sam_to_bam = bam.sam_to_bam_stream_cmd(config) sort = bam.sort_cmd(config, tmp_dir) cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} " run_message = "Running STAR aligner on %s and %s" % (fastq_file, ref_file) with file_transaction(data, final_out) as tx_final_out: do.run(cmd.format(**locals()), run_message, None) data = _update_data(final_out, out_dir, names, data) return data
def align(fastq_file, pair_file, ref_file, names, align_dir, data): config = data["config"] out_prefix = os.path.join(align_dir, names["lane"]) out_file = out_prefix + "Aligned.out.sam" out_dir = os.path.join(align_dir, "%s_star" % names["lane"]) final_out = os.path.join(out_dir, "{0}.bam".format(names["sample"])) if file_exists(final_out): return final_out star_path = config_utils.get_program("STAR", config) fastq = " ".join([fastq_file, pair_file]) if pair_file else fastq_file num_cores = config["algorithm"].get("num_cores", 1) safe_makedir(align_dir) cmd = ("{star_path} --genomeDir {ref_file} --readFilesIn {fastq} " "--runThreadN {num_cores} --outFileNamePrefix {out_prefix} " "--outReadsUnmapped Fastx --outFilterMultimapNmax 10 " "--outStd SAM " "--outSAMunmapped Within --outSAMattributes %s" % " ".join(ALIGN_TAGS)) cmd = cmd + " --readFilesCommand zcat " if is_gzipped(fastq_file) else cmd cmd += _read_group_option(names) fusion_mode = utils.get_in(data, ("config", "algorithm", "fusion_mode"), False) if fusion_mode: cmd += " --chimSegmentMin 15 --chimJunctionOverhangMin 15" strandedness = utils.get_in(data, ("config", "algorithm", "strandedness"), "unstranded").lower() if strandedness == "unstranded": cmd += " --outSAMstrandField intronMotif " with tx_tmpdir(data) as tmp_dir: sam_to_bam = bam.sam_to_bam_stream_cmd(config) sort = bam.sort_cmd(config, tmp_dir) cmd += "| {sam_to_bam} | {sort} -o {tx_final_out} " run_message = "Running STAR aligner on %s and %s." % (fastq_file, ref_file) with file_transaction(data, final_out) as tx_final_out: do.run(cmd.format(**locals()), run_message, None) return final_out