def rmdup(align_file, out_file): cmd = ("samtools view -bh {align_file} | samtools sort -o -n - {tmp} | bammarkduplicates rmdup=1 O={tx_out_file}") tmp = align_file + "_tmp" if not os.path.exists(out_file): with file_transaction(out_file) as tx_out_file: do.run(cmd.format(**locals())) return out_file
def qc(sam_file): """fastqc for the sam file""" out_dir = os.path.basename(sam_file) + "_fastq" cmd = "fastqc {sam_file} -f sam -o {out_dir}".format(**locals()) if not os.path.exists(out_dir): os.mkdir(out_dir) do.run(cmd) return out_dir
def _bowtie_align(fastq_file, control_index, out_file): cmd = ("bowtie2 -p 4 --no-unal -x {control_index} -U {fastq_file} | samtools view -Shb /dev/stdin > {tx_out_file} ") stat_file = out_file + ".flagstat" if not os.path.exists(out_file): with file_transaction(out_file) as tx_out_file: do.run(cmd.format(**locals()), "bowtie2 %s" % fastq_file) do.run("samtools flagstat {out_file} > {stat_file}".format(**locals()), "stats control sequences") return stat_file
def rmdup(align_file, out_file): cmd = ( "samtools view -bh {align_file} | samtools sort -o -n - {tmp} | bammarkduplicates rmdup=1 O={tx_out_file}" ) tmp = align_file + "_tmp" if not os.path.exists(out_file): with file_transaction(out_file) as tx_out_file: do.run(cmd.format(**locals())) return out_file
def qc(data, args): """fastqc for the sam file""" sam_file = data['align'] out_dir = os.path.basename(sam_file) + "_fastq" cmd = "fastqc {sam_file} -f sam -o {out_dir}".format(**locals()) if not os.path.exists(out_dir): os.mkdir(out_dir) do.run(cmd) else: logger.my_logger.info("%s has already been QC, skipping." % (sam_file)) return data
def qc(data, args): """fastqc for the sam file""" sam_file = data['r1_path'] out_dir = os.path.basename(data["sample_id"]) + "_fastqc" cmd = "fastqc {sam_file} -f sam -o {out_dir}".format(**locals()) if not os.path.exists(out_dir): os.mkdir(out_dir) do.run(cmd) else: logger.my_logger.info("%s has already been QC, skipping." % (sam_file)) return data
def _bowtie_align(fastq_file, control_index, out_file): cmd = ( "bowtie2 -p 4 --no-unal -x {control_index} -U {fastq_file} | samtools view -Shb /dev/stdin > {tx_out_file} " ) stat_file = out_file + ".flagstat" if not os.path.exists(out_file): with file_transaction(out_file) as tx_out_file: do.run(cmd.format(**locals()), "bowtie2 %s" % fastq_file) do.run("samtools flagstat {out_file} > {stat_file}".format(**locals()), "stats control sequences") return stat_file
def star_align(data, args, fastq_path, out_prefix, opts=""): cores = args.cores_per_job reference_prefix = args.aligner_index max_best = MAX_BEST out_file = out_prefix + "Aligned.out.sam" if not os.path.exists(out_file): cmd = ("STAR --genomeDir {reference_prefix} --readFilesIn {fastq_path} --readFilesCommand zcat " "--runThreadN {cores} --outFileNamePrefix {out_prefix} " "--outFilterMultimapNmax {max_best} " "--outSAMattributes NH HI NM MD AS {opts} " "--outSAMstrandField intronMotif").format(**locals()) do.run(cmd) clean_file = clean_align(out_file, out_prefix + "cleaned.sam") return out_file, clean_file
def star_align(fastq_path, reference_prefix, out_prefix, cores=1): max_best = MAX_BEST out_file = out_prefix + "Aligned.out.sam" if file_exists(out_file): print ("%s has already been aligned, skipping." % (fastq_path)) return out_file cmd = ("STAR --genomeDir {reference_prefix} --readFilesIn {fastq_path} --readFilesCommand zcat " "--runThreadN {cores} --outFileNamePrefix {out_prefix} " "--outFilterMultimapNmax {max_best} " "--outSAMattributes NH HI NM MD AS " "--outSAMstrandField intronMotif").format(**locals()) do.run(cmd) return out_file
def star_align(data, args, fastq_path, out_prefix, opts=""): cores = args.cores_per_job reference_prefix = args.aligner_index max_best = MAX_BEST out_file = out_prefix + "Aligned.sortedByCoord.out.bam" if not os.path.exists(out_file): cmd = ( "STAR --genomeDir {reference_prefix} --readFilesIn {fastq_path} --readFilesCommand zcat " "--runThreadN {cores} --outFileNamePrefix {out_prefix} " "--outFilterMultimapNmax {max_best} --outSAMtype BAM SortedByCoordinate " "--outSAMattributes NH HI NM MD AS {opts} " "").format(**locals()) do.run(cmd) clean_file = clean_align(out_file, out_prefix + "cleaned.sam") return out_file, clean_file
def _cmd_counts(in_file, out_file, gtf_file, cores): if not os.path.exists(out_file): cmd = "featureCounts -R -T {cores} -a {gtf_file} -o {out_file} {in_file}" do.run(cmd.format(**locals())) return in_file + ".featureCounts"
def _cmd_counts(in_file, out_file, gtf_file, cores): if not os.path.exists(out_file): cmd = "featureCounts -R -T {cores} --primary -a {gtf_file} -o {out_file} {in_file}" do.run(cmd.format(**locals())) return in_file + ".featureCounts"