def external_script(_command): import inspect, json, tempfile VARS_TO_PASS = '******'.split() callerlocal = inspect.currentframe().f_back.f_locals callerglobal = inspect.currentframe().f_back.f_globals packed = {} for var in VARS_TO_PASS: if isinstance(callerlocal[var], int): packed[var] = callerlocal[var] else: packed[var] = list(callerlocal[var].allitems()) with tempfile.NamedTemporaryFile(mode='wt') as tmpfile: json.dump(packed, tmpfile) tmpfile.flush() os.environ[PARAMETER_PASSING_ENVVAR] = tmpfile.name try: locals().update(callerglobal) locals().update(callerlocal) shell(_command) finally: del os.environ[PARAMETER_PASSING_ENVVAR]
def get_cuda_arch(): ''' Determine currently installed NVIDIA GPU cards by PCI device ID and match them with the predefined GPU model lists. It tries to detect all GPUs and include cubins suitable for all GPU architectures detected. If your GPU is not detected correctly, update *_DEVICES files by referring https://pci-ids.ucw.cz/v2.2/pci.ids and make a pull request! ''' pci_list = str(shell('lspci -nn', read=True)) supported_archs = ['MAXWELL', 'KEPLER', 'FERMI'] devtypes_found = set() for devtype in supported_archs: fname = devtype + '_DEVICES' with open(fname, 'r') as f: for line in f: line = line.strip() if not line: continue model, pciid = line.split('\t') pciid = pciid.replace('0x', '') if pciid in pci_list: devtypes_found.add(devtype) if len(devtypes_found) == 0: return [] return list(sorted(devtypes_found, key=lambda k: supported_archs.index(k)))
def fasta_postprocess(origfn, newfn): """ The fasta from UCSC comes as a tarball of fastas. So we extract them all to a temp directory and then cat them all together into the final fa.gz file. """ assert ( (isinstance(origfn, list)) and (len(origfn) == 1) ), 'unexpected input: %s' % origfn origfn = origfn[0] t = tarfile.open(origfn) shell('mkdir -p {origfn}.tmp') t.extractall(origfn + '.tmp') with gzip.open(newfn, 'wt') as fout: for fa in sorted(glob.glob(origfn + '.tmp/*.fa')): print(fa) fout.write(open(fa).read()) shell('rm -r {origfn}.tmp')
def rscript(string, scriptname, log=None): """ Saves the string as `scriptname` and then runs it Parameters ---------- string : str Filled-in template to be written as R script scriptname : str File to save script to log : str File to redirect stdout and stderr to. If None, no redirection occurs. """ with open(scriptname, 'w') as fout: fout.write(string) if log: _log = '> {0} 2>&1'.format(log) else: _log = "" shell('Rscript {scriptname} {_log}')
from snakemake.shell import shell from os import path log = snakemake.log_fmt_shell(stdout=True, stderr=True) def inputCmd(subsets): cond_a_files = subsets[0] cond_b_files = subsets[1] input_cmd = "-a {} -b {}".format(cond_a_files, cond_b_files) print(input_cmd) return input_cmd quant_subsets = snakemake.params.quant_subsets input_cmd = inputCmd(quant_subsets) output_path = snakemake.params.output_path min_cov = snakemake.params.min_cov min_sam = snakemake.params.min_sam shell("whippet-delta.jl " "{input_cmd} " "-o {output_path} " "-r {min_cov} " "-s {min_sam} " "{log}")
# list to a string, here we detect single-end reads by checking if input.fastq # is a string. if isinstance(snakemake.input.fastq, str): fastqs = '-U {0} '.format(snakemake.input.fastq) else: assert len(snakemake.input.fastq) == 2 fastqs = '-1 {0} -2 {1} '.format(*snakemake.input.fastq) # Figure out the prefix based on the input index, which has the format # # prefix.N.ht2 # # where N is [1-8]. We strip off the .N.ht2 and ensure the remaining prefixes # are the same. # prefixes = list(set(map(lambda x: '.'.join(x.split('.')[:-2]), snakemake.input.index))) assert len(prefixes) == 1, 'Multiple prefixes detected from "{0}"'.format(snakemake.input.index) prefix = prefixes[0] shell( "hisat2 " "-x {prefix} " "{fastqs} " "--threads {snakemake.threads} " "{extra} " "-S {snakemake.output}.sam " "{log}" ) shell("samtools view -Sb {snakemake.output}.sam > {snakemake.output} && rm {snakemake.output}.sam")
__author__ = "Jusitn fear" __copyright__ = "Copyright 2016, Justin Fear" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell try: extra = snakemake.params.extra except AttributeError: extra = "" if snakemake.log: log = "> {} 2>&1".format(snakemake.log) else: log = "" shell( "picard MarkDuplicates " "I={snakemake.input.bam} " "O={snakemake.output.bam} " "{extra} " "M={snakemake.output.metrics} " "{log}" )
__author__ = "Sebastian Kurscheid" __copyright__ = "Copyright 2016, Sebastian Kurscheid" __email__ = "*****@*****.**" __license__ = "MIT" __date__ = "2016-09-14" __version__ = 0.1 from snakemake.shell import shell shell(""" {snakemake.params.macs2_dir}/macs2 callpeak -B \ -t {snakemake.input.chip} \ -c {snakemake.input.input} \ -n {snakemake.wildcards.sample} \ -f {snakemake.params.format} \ --nomodel \ --extsize {snakemake.params.extsize} \ --outdir {snakemake.output} """)
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell("samtools index {snakemake.params} {snakemake.input[0]} {snakemake.output[0]}")
log_dir = os.path.dirname(snakemake.log[0]) output_dir = os.path.dirname(snakemake.output[0]) # sample basename basename = os.path.splitext(os.path.basename(snakemake.input.bam[0]))[0] split_inputs = " ".join(str(x) for x in range(0, int(snakemake.threads))) with tempfile.TemporaryDirectory() as tmp_dir: shell( "(BIN_DIR=$(ls -d $CONDA_DEFAULT_ENV/share/deepvariant*/binaries/Deepvariant/*/DeepVariant*) \n" "parallel --eta --halt 2 --joblog {log_dir}/log --res {log_dir} " "python $BIN_DIR/make_examples.zip " "--mode calling --ref {snakemake.input.ref} --reads {snakemake.input.bam} " "--examples {tmp_dir}/{basename}.tfrecord@{snakemake.threads}.gz " "--gvcf {tmp_dir}/{basename}.gvcf.tfrecord@{snakemake.threads}.gz " "--task {{}} " "::: {split_inputs} \n" "dv_call_variants.py " "--cores {snakemake.threads} " "--outfile {tmp_dir}/{basename}.tmp " "--sample {basename} " "--examples {tmp_dir} " "--model {snakemake.params.model} \n" "python $BIN_DIR/postprocess_variants.zip " "--ref {snakemake.input.ref} " "--infile {tmp_dir}/{basename}.tmp " "--outfile {snakemake.output.vcf} " "--nonvariant_site_tfrecord_path {tmp_dir}/{basename}.gvcf.tfrecord@{snakemake.threads}.gz " "--gvcf_outfile {snakemake.output.gvcf} ) {log}")
from snakemake.shell import shell initial_log = snakemake.get_log() stdout_log = snakemake.get_log(stderr=False, append=True) stderr_log = snakemake.get_log(stdout=False, append=True) shell(''' cat {snakemake.input} > {snakemake.output} echo "should not appear since next line truncates" {initial_log} echo "first line" {initial_log} (>&2 echo "a stderr message") {stderr_log} (echo "a stdout message") {stdout_log} ''')
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell log = snakemake.log_fmt_shell(stdout=True, stderr=True) # Samtools takes additional threads through its option -@ # One thread for samtools merge # Other threads are *additional* threads passed to the '-@' argument threads = "" if snakemake.threads <= 1 else " -@ {} ".format( snakemake.threads - 1) shell( "samtools index {threads} {snakemake.params} {snakemake.input[0]} {snakemake.output[0]} {log}" )
__author__ = "David Laehnemann, Victoria Sack" __copyright__ = "Copyright 2018, David Laehnemann, Victoria Sack" __email__ = "*****@*****.**" __license__ = "MIT" import os from snakemake.shell import shell prefix = os.path.splitext(snakemake.output[0])[0] shell("samtools bam2fq {snakemake.params} " " -@ {snakemake.threads} " " {snakemake.input[0]}" " >{snakemake.output[0]} ")
__author__ = "Adrien Leger" __copyright__ = "Copyright 2019, Adrien Leger" __email__ = "*****@*****.**" __license__ = "MIT" __version__ = "0.0.1" # Imports from snakemake.shell import shell # Shortcuts opt = snakemake.params.get("opt", "") bam_input = snakemake.input.bam bam_output = snakemake.output.bam # Run shell command shell( "pyBioTools Alignment Split {opt} -i {bam_input} -l {bam_output} --verbose &> {snakemake.log}" )
"""Snakemake wrapper for ProSolo single-cell-bulk calling""" __author__ = "David Lähnemann" __copyright__ = "Copyright 2020, David Lähnemann" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell log = snakemake.log_fmt_shell(stdout=True, stderr=True) shell( "( prosolo single-cell-bulk " "--omit-indels " " {snakemake.params.extra} " "--candidates {snakemake.input.candidates} " "--output {snakemake.output} " "{snakemake.input.single_cell} " "{snakemake.input.bulk} " "{snakemake.input.ref} ) " "{log} " )
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell( "(samtools mpileup {snakemake.params.mpileup} {snakemake.input.samples} " "--fasta-ref {snakemake.input.ref} --BCF --uncompressed | " "bcftools call -m {snakemake.params.call} -o {snakemake.output[0]} -v -) 2> {snakemake.log}")
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell from snakemake_wrapper_utils.java import get_java_opts log = snakemake.log_fmt_shell(stdout=True, stderr=True) extra = snakemake.params.get("extra", "") java_opts = get_java_opts(snakemake) bams = snakemake.input if isinstance(bams, str): bams = [bams] bams = list(map("INPUT={}".format, bams)) shell("picard MarkDuplicates " # Tool and its subcommand "{java_opts} " # Automatic java option "{extra} " # User defined parmeters "{bams} " # Input bam(s) "OUTPUT={snakemake.output.bam} " # Output bam "METRICS_FILE={snakemake.output.metrics} " # Output metrics "{log}" # Logging )
log=snakemake.log_fmt_shell(stdout=True,stderr=True) target=snakemake.params.target if target['catg']=="expr": ref=target['ref'] start=target['start'] end=target['end'] bamfile=snakemake.input.bam tmpfile=tempfile.mktemp() shell( "samtools view -b {bamfile} {ref}:{start}-{end} > {tmpfile} ;" "samtools sort {tmpfile} > {snakemake.output[0]} ;" "samtools index {snakemake.output[0]} " ) if target['catg']=="fusion": ref1=target['r1']['ref'] start1=target['r1']['start'] end1=target['r1']['end'] bamfile=snakemake.input.chimeric ref2=target['r2']['ref'] start2=target['r2']['start'] end2=target['r2']['end'] tmpfile=tempfile.mktemp()
fastq = [fastq] if isinstance(fastq, str) else fastq if len(fastq) > 2: raise RuleInputException( 'Your sequencing read should be single-read or paired-end.') single_flag = '' if len(fastq) == 2 else '--single' if single_flag and (fragment_length == '' or standard_deviation == ''): raise RuleParameterException( 'Please provide fragment length(-l) and standard deviation(-s) parameter for single-end reads.' ) fastq = ' '.join(fastq) index = snakemake.input.index threads = snakemake.threads output_directory = path.dirname(snakemake.output[0]) # Execute shell command. shell("(" "kallisto quant " "-i {index} " "-o {output_directory} " "-t {threads} " "{fragment_length} " "{standard_deviation} " "{single_flag} " "{extra} " "{fastq}" ")" "{log}")
__author__ = "Max Cummins" __copyright__ = "Copyright 2021, Max Cummins" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell from os import path log = snakemake.log_fmt_shell(stdout=False, stderr=True) shell("assembly-stats" " {snakemake.params.extra}" " {snakemake.input.assembly}" " > {snakemake.output.assembly_stats}" " {log}")
__author__ = "Jack Zhu" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell(""" module load STAR/2.5.1b STAR-Fusion --genome_lib_dir {snakemake.params.genome_lib_dir} \ -J Chimeric.out.junction \ --output_dir {snakemake.params.star_fusion_outdir} \ --tmpdir /lscratch/${{SLURM_JOBID}} """)
# Extract parameters. extra = snakemake.params.get('extra', '') # Extract required inputs. input_file = snakemake.input[0] input_command = '-i %s' % input_file # Extract optional inputs. output_file = snakemake.output[0] output_command = '-o %s' % output_file # Extract user parameters. user_parameters = [] user_parameters.append(optionify_params('gsize', '--gsize')) user_parameters.append(optionify_params('tsize', '--tsize')) user_parameters.append(optionify_params('pvalue', '--pvalue')) user_parameters.append(optionify_params('keep_dup', '--keep_dup')) user_parameters.append(optionify_params('verbose', '--verbose')) user_parameters = ' '.join([p for p in user_parameters if not p != '']) # Execute shell command. shell("(" "macs2 filterdup " "{input_command} " "{output_command} " "{user_parameters} " "{extra} " ") " "{log}")
try: extra = snakemake.params.extra except AttributeError: extra = "" if snakemake.log: log = "> {} 2>&1".format(snakemake.log) else: log = "" # Figure out the prefix based on the input index, which has the format # # prefix.N.bt2 # # where N is [1-4]. We strip off the .N.bt2 and ensure the remaining prefixes # are the same. # prefixes = list(set(map(lambda x: '.'.join(x.split('.')[:-2]), snakemake.output))) assert len(prefixes) == 1, 'Multiple prefixes detected from "{0}"'.format(snakemake.output) prefix = prefixes[0] shell( "bowtie2-build " "--threads {snakemake.threads} " "{extra} " "{snakemake.input} " "{prefix} " "{log}" )
__author__ = "Adrien Leger" __copyright__ = "Copyright 2019, Adrien Leger" __email__ = "*****@*****.**" __license__ = "MIT" __version__ = "0.0.3" # Imports from snakemake.shell import shell import os # Shortcuts opt = snakemake.params.get("opt", "") ref = snakemake.input.ref index_dir = snakemake.output.index_dir os.makedirs(index_dir, exist_ok=True) # Run shell command shell( "salmon index {opt} -p {snakemake.threads} -t {ref} -i {index_dir} &> {snakemake.log}" )
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" import os from snakemake.shell import shell prefix = os.path.splitext(snakemake.output[0])[0] shell( "samtools sort {snakemake.params} -@ {snakemake.threads} -o {snakemake.output[0]} " "-T {prefix} {snakemake.input[0]}")
"""Snakemake wrapper for trimming paired-end reads using cutadapt.""" __author__ = "Julian de Ruiter" __copyright__ = "Copyright 2017, Julian de Ruiter" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell n = len(snakemake.input) assert n == 2, "Input must contain 2 (paired-end) elements." log = snakemake.log_fmt_shell(stdout=False, stderr=True) shell("cutadapt" " {snakemake.params.adapters}" " {snakemake.params.others}" " -o {snakemake.output.fastq1}" " -p {snakemake.output.fastq2}" " -j {snakemake.threads}" " {snakemake.input}" " > {snakemake.output.qc} {log}")
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell(""" (module load fastqc; \ fastqc --extract -t {snakemake.threads} -o {snakemake.params.outdir} {snakemake.input} ) 2> {snakemake.log}""")
# Imports from snakemake.shell import shell from pyfaidx import Fasta from math import log2 import os # Wrapper info wrapper_name = "star_index" wrapper_version = "0.0.4" author = "Adrien Leger" license = "MIT" shell( "echo 'Wrapper {wrapper_name} v{wrapper_version} / {author} / Licence {license}' > {snakemake.log}" ) # Shortcuts opt = snakemake.params.get("opt", "") ref = snakemake.input.ref annotation = snakemake.input.annotation index_dir = os.path.abspath(snakemake.output.index_dir) + "/" os.makedirs(index_dir, exist_ok=True) # Comput index base depending on genome length genome_len = 0 with Fasta(ref) as fa: for seq in fa: genome_len += len(seq) indexNbases = min(14, int(log2(genome_len) / 2) - 1) # Run shell command shell("STAR {opt} \
__author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell("samtools merge --threads {snakemake.threads} {snakemake.params} " "{snakemake.output[0]} {snakemake.input}")
cmds.append("--dir {output_dir:q}") if html_file: html_file_name = os.path.basename(html_file) cmds.append("--output {html_file_name:q}") # reports reports = [ "alignment_report", "dedup_report", "splitting_report", "mbias_report", "nucleotide_report", ] skip_optional_reports = answer2bool( snakemake.params.get("skip_optional_reports", False)) for report_name in reports: path = snakemake.input.get(report_name, "") if path: locals()[report_name] = path cmds.append("--{0} {{{1}:q}}".format(report_name, report_name)) elif skip_optional_reports: cmds.append("--{0} 'none'".format(report_name)) # log log = snakemake.log_fmt_shell(stdout=True, stderr=True) cmds.append("{log}") # run shell command: shell(" ".join(cmds))
# the veqtl-mapper swarming mechanism is really silly... # we need to count the number of lines in the intput file with open(snakemake.input['pheno']) as f: n_features = sum(1 for line in f) n_features -= 1 # drop header # get the number of features we want to use per run n_features_per_job = int( np.ceil(n_features / int(snakemake.wildcards['j_total']))) # do we need the final job? -- could do this modulo total_n = n_features_per_job * (int(snakemake.wildcards['j_total']) - 1) final_job = snakemake.wildcards['j_cur'] == snakemake.wildcards['j_total'] if total_n >= n_features and final_job: # we don't need the final job. example: 608 gene and 30 jobs shell('touch %s' % snakemake.output[0]) else: # start to build the command cmd = 'veqtl-mapper --vcf %s' % (snakemake.input['geno']) cmd = '%s --bed %s' % (cmd, snakemake.input['pheno']) cmd = '%s --genes %d' % (cmd, n_features_per_job) cmd = '%s --job-number %s' % (cmd, snakemake.wildcards['j_cur']) cmd = '%s --out %s' % (cmd, snakemake.output[0]) params = dict(snakemake.params.items()) window = params.get("window", "1000000") # cis window default = 1Mb cmd = '%s --window %s' % (cmd, window) if 'other_settings' in params: cmd = '%s %s' % (cmd, snakemake.params['other_settings'])
from snakemake.shell import shell log = snakemake.get_log(stdout=False, append=True) shell(''' cat {snakemake.input} > {snakemake.output} (>&2 echo "a stderr message") {log} (echo "a stdout message") {log} ''')
"""Snakemake wrapper for Salmon Index.""" __author__ = "Tessa Pierce" __copyright__ = "Copyright 2018, Tessa Pierce" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell log = snakemake.log_fmt_shell(stdout=True, stderr=True) extra = snakemake.params.get("extra", "") shell("salmon index -t {snakemake.input} -i {snakemake.output} " " --threads {snakemake.threads} {extra} {log}")
def plus_lncrna_fasta_postprocess(tmpfiles, outfile): shell('gunzip -c {tmpfiles} > {outfile}')
import tempfile from snakemake.shell import shell from snakemake_wrapper_utils.java import get_java_opts extra = snakemake.params.get("extra", "") java_opts = get_java_opts(snakemake) log = snakemake.log_fmt_shell(stdout=True, stderr=True) filters = [ "--filter-name {} --filter-expression '{}'".format( name, expr.replace("'", "\\'")) for name, expr in snakemake.params.filters.items() ] intervals = snakemake.input.get("intervals", "") if not intervals: intervals = snakemake.params.get("intervals", "") if intervals: intervals = "--intervals {}".format(intervals) with tempfile.TemporaryDirectory() as tmpdir: shell("gatk --java-options '{java_opts}' VariantFiltration" " --variant {snakemake.input.vcf}" " --reference {snakemake.input.ref}" " {filters}" " {intervals}" " {extra}" " --tmp-dir {tmpdir}" " --output {snakemake.output.vcf}" " {log}")
__author__ = "Sean Davis" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell(""" ####################### MEM="48" module load GATK java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \ -T RealignerTargetCreator -R {snakemake.input.ref} -nt ${{SLURM_CPUS_ON_NODE}} -known {snakemake.input.phase1} -known {snakemake.input.mills} \ -I {snakemake.input.bam} -o /lscratch/${{SLURM_JOBID}}/realignment.intervals > {snakemake.log} 2>&1 java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \ -T IndelRealigner -R {snakemake.input.ref} -known {snakemake.input.phase1} -known {snakemake.input.mills} \ -I {snakemake.input.bam} --targetIntervals /lscratch/${{SLURM_JOBID}}/realignment.intervals \ -o /lscratch/${{SLURM_JOBID}}/lr.bam >>{snakemake.log} 2>&1 java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \ -T BaseRecalibrator -R {snakemake.input.ref} -knownSites {snakemake.input.phase1} -knownSites {snakemake.input.mills} \ -I /lscratch/${{SLURM_JOBID}}/lr.bam -nct ${{SLURM_CPUS_ON_NODE}} \ -o /lscratch/${{SLURM_JOBID}}/recalibration.matrix.txt >>{snakemake.log} 2>&1 java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \ -T PrintReads -R {snakemake.input.ref} -I /lscratch/${{SLURM_JOBID}}/lr.bam \ -nct ${{SLURM_CPUS_ON_NODE}} \ -o {snakemake.output.bam} -BQSR /lscratch/${{SLURM_JOBID}}/recalibration.matrix.txt >>{snakemake.log} 2>&1 ###################### """)
counts = np.fromstring(inpf.read(), np.uint32).reshape((cycles, bins)) return counts.astype(np.uint64) def write_sig_dists(counts, filename): with gzip.open(filename, 'wb') as outf: outf.write(struct.pack('<III', 4, counts.shape[0], counts.shape[1])) outf.write(counts.astype(np.uint32).tostring()) counts_aggr = None for signals, taginfo, out in zip(input.signals, input.taginfo, output.taginfo): cmd = format('{BINDIR}/tailseq-polya-ruler {wildcards.tile} {signals} \ {input.score_cutoffs} {CONF[polyA_finder][signal_analysis_trigger]} \ {CONF[polyA_ruler][downhill_extension_weight]} \ {taginfo} {CONF[polyA_seeder][dist_sampling_bins]} \ {CONF[polyA_ruler][signal_resampling_gap]} \ {output.sigdists} | {BGZIP_CMD} -c > {out}', wildcards=wildcards, input=input, output=output) shell(cmd) counts_new = load_sig_dists(output.sigdists) if counts_aggr is None: counts_aggr = counts_new else: counts_aggr += counts_new write_sig_dists(counts_aggr, output.sigdists)
__author__ = "Sean Davis" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell(""" module load igvtools igvtools count {snakemake.input} {snakemake.output} {snakemake.params.genome} """)
__author__ = "Sean Davis" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell # assumes that bams are coming in as a list shell(""" module load samtools ( samtools view -h {snakemake.input} | \ sed 's/\tBI\:Z\:[^\t]*//' | \ sed 's/\tBI\:Z\:[^\t]*//' | samtools view -bS - > {snakemake.output} ) >& {snakemake.log} """)
assert n_reads == 1, "Input must contain 1 fastq files. Given: %r." % [ n_reads, snakemake.input.reads ] assert n_fastqc_html == 1, "Input must contain 1 fastqc html reports. Given: %r." % n_fastqc_html assert n_fastqc_zip == 1, "Input must contain 1 fastqc .zip files. Given: %r." % n_fastqc_zip # Don't run with `--fastqc` flag if "--fastqc" in snakemake.params.get("extra", ""): raise ValueError("The trim_galore Snakemake wrapper cannot " "be run with the `--fastqc` flag. Please " "remove the flag from extra params. " "You can use the fastqc Snakemake wrapper on " "the input and output files instead.") # Check that four output files were supplied m = len(snakemake.output) assert m == 2, "Output must contain 2 files. Given: %r." % m # Check that all output files are in the same directory out_dir = os.path.dirname(snakemake.output[0]) for file_path in snakemake.output[1:]: assert out_dir == os.path.dirname(file_path), \ "trim_galore can only output files to a single directory." \ " Please indicate only one directory for the output files." shell("(trim_galore" " {snakemake.params.extra}" " -o {out_dir}" " {snakemake.input.reads})" " {log}")
__author__ = "Sean Davis" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell # assumes that "extra" files are available. These should be included in the # input specification for the rule # the awk line replaces spaces in the info field (not allowed) with "_". # This was necessary because Kaviar has spaces in the database names shell(""" module load vcfanno vcfanno -p ${{SLURM_CPUS_ON_NODE}} {snakemake.input.config} {snakemake.input.vcf} \ | awk -F'\t' -vOFS='\t' '{{ gsub(" ", "_", $8) ; print }}' > {snakemake.output} """)
######## Snakemake header ######## import sys sys.path.insert( 0, "/home/athersh/miniconda3/envs/snakemake/lib/python3.5/site-packages") import pickle snakemake = pickle.loads( b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(X\x19\x00\x00\x00dedup/sjl_kc_input_R1.bamq\x06X\x18\x00\x00\x00dedup/sjl_kc_shep_R1.bamq\x07e}q\x08(X\x03\x00\x00\x00inpq\th\x06X\x02\x00\x00\x00ipq\nh\x07X\x06\x00\x00\x00_namesq\x0b}q\x0c(h\tK\x00N\x86q\rh\nK\x01N\x86q\x0euubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(X=\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1/sjl_kc_shep_R1_peaks.narrowPeakq\x12X0\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1/sjl_kc_shep_R1.bedq\x13e}q\x14(X\n\x00\x00\x00narrowPeakq\x15h\x12X\x03\x00\x00\x00bedq\x16h\x13h\x0b}q\x17(h\x15K\x00N\x86q\x18h\x16K\x01N\x86q\x19uubX\t\x00\x00\x00wildcardsq\x1acsnakemake.io\nWildcards\nq\x1b)\x81q\x1cX\x0e\x00\x00\x00sjl_kc_shep_R1q\x1da}q\x1e(X\x06\x00\x00\x00sampleq\x1fh\x1dh\x0b}q X\x06\x00\x00\x00sampleq!K\x00N\x86q"subX\x03\x00\x00\x00logq#csnakemake.io\nLog\nq$)\x81q%}q&h\x0b}q\'sbX\x07\x00\x00\x00threadsq(K\x01X\x04\x00\x00\x00ruleq)X\x05\x00\x00\x00macs2q*X\x06\x00\x00\x00paramsq+csnakemake.io\nParams\nq,)\x81q-X\x1d\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1q.a}q/(X\x06\x00\x00\x00prefixq0h.h\x0b}q1h0K\x00N\x86q2subX\x06\x00\x00\x00configq3}q4X\t\x00\x00\x00resourcesq5csnakemake.io\nResources\nq6)\x81q7(K\x01K\x01e}q8(X\x06\x00\x00\x00_coresq9K\x01X\x06\x00\x00\x00_nodesq:K\x01h\x0b}q;(h9K\x00N\x86q<h:K\x01N\x86q=uubub.' ) ######## Original script ######### from snakemake.shell import shell shell('macs2 ' 'callpeak ' '-c {snakemake.input.inp} ' '-t {snakemake.input.ip} ' '--bdg --SPMR ' '-n {snakemake.wildcards.sample} ' '--outdir {snakemake.params.prefix}') shell('Rscript {snakemake.params.prefix}_model.r')
__author__ = "Behram Radmanesh" __copyright__ = "Copyright 2016, Behram Radmanesh" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell try: extra = snakemake.params.extra except AttributeError: extra = "" shell(""" awk '{{print $3}}' {snakemake.input.sam} \ | grep "^ch" | sort | uniq -c | sed -e 's/_\|:\| \|-/\\t/g' \ | awk '{{OFS="\t";print $2,$3,$4,$1}}' > \ {snakemake.output}""" )
__copyright__ = "Copyright 2018, Patrik Smeds" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell.executable("bash") log = snakemake.log_fmt_shell(stdout=False, stderr=True) extra_params = snakemake.params.get("extra", "") bam_input = snakemake.input[0] if not isinstance(bam_input, str) and len(snakemake.input) != 1: raise ValueError("Input bam should be one bam file: " + str(bam_input) + "!") output_file = snakemake.output[0] if not isinstance(output_file, str) and len(snakemake.output) != 1: raise ValueError("Output should be one bam file: " + str(output_file) + "!") shell( "fgbio SetMateInformation" " -i {bam_input}" " -o {output_file}" " {extra_params}" " {log}" )
__author__ = "Sean Davis" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell # Assumes that SDST is installed: # "easy_install git+https://github.com/seandavi/SDST.git" # input: vcf # input: bam the RNA-seq bam file # output: vcf shell(""" seqtool vcf rnacount -f {snakemake.input.vcf} -o {snakemake.output.vcf} {snakemake.input.bam} 2> {snakemake.log} """)
__author__ = "Ali Ghaffaari" __copyright__ = "Copyright 2018, Ali Ghaffaari" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell log = snakemake.log_fmt_shell() shell( "(wgsim {snakemake.params} {snakemake.input.ref}" " {snakemake.output.read1} {snakemake.output.read2}) {log}" )
__email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell(""" module load samtools/1.3 picard/1.139 ## Generate intervals for hsmetrics cat <(samtools view -H {snakemake.input} ) <(gawk '{{print $1 "\t" $2+1 "\t" $3 "\t+\tinterval_" NR}}' {snakemake.params.BAIT_INTERVALS} )> {snakemake.input}.BAIT_INTERVALS cat <(samtools view -H {snakemake.input} ) <(gawk '{{print $1 "\t" $2+1 "\t" $3 "\t+\tinterval_" NR}}' {snakemake.params.TARGET_INTERVALS} )> {snakemake.input}.TARGET_INTERVALS ## CalculateHsMetrics MEM=$((SLURM_MEM_PER_NODE / 1024)) #MEM="22" java -Xmx${{MEM}}g -jar $PICARDJARPATH/picard.jar \ CalculateHsMetrics \ BAIT_INTERVALS={snakemake.input}.BAIT_INTERVALS \ TARGET_INTERVALS={snakemake.input}.TARGET_INTERVALS \ I={snakemake.input} \ O={snakemake.output} \ AS=true \ VALIDATION_STRINGENCY=SILENT \ 2> {snakemake.log} rm -f {snakemake.input}.BAIT_INTERVALS {snakemake.input}.TARGET_INTERVALS """)
__author__ = "Per Unneberg" __copyright__ = "Copyright 2020, Per Unneberg" __email__ = "*****@*****.**" __license__ = "MIT" import os from snakemake.shell import shell options = snakemake.params.get("options", "") log = snakemake.log_fmt_shell(stdout=True, stderr=True) analysis = snakemake.wildcards.analysis outdir = os.path.join("results", "genecovr", analysis) shell("genecovr -p {snakemake.threads} " "{snakemake.params.options} " "-d {outdir} " "{snakemake.input.csv} " "{log}")
__author__ = "Sebastian Kurscheid" __copyright__ = "Copyright 2016, Sebastian Kurscheid" __email__ = "*****@*****.**" __license__ = "MIT" __date__ = "2016-09-07" __version__ = 0.1 from snakemake.shell import shell shell(""" bedtools bamtofastq -i {snakemake.input.bam_file} \ -fq {snakemake.output.read1} \ -fq2 {snakemake.output.read2} """)
log = snakemake.log_fmt_shell(stdout=False, stderr=True) def basename_without_ext(file_path): """Returns basename of file path, without the file extension.""" base = path.basename(file_path) split_ind = 2 if base.endswith(".gz") else 1 base = ".".join(base.split(".")[:-split_ind]) return base # Run fastqc, since there can be race conditions if multiple jobs # use the same fastqc dir, we create a temp dir. with TemporaryDirectory() as tempdir: shell("fastqc {snakemake.params} --quiet " "--outdir {tempdir} {snakemake.input[0]}" " {log}") # Move outputs into proper position. output_base = basename_without_ext(snakemake.input[0]) html_path = path.join(tempdir, output_base + "_fastqc.html") zip_path = path.join(tempdir, output_base + "_fastqc.zip") if snakemake.output.html != html_path: shell("mv {html_path} {snakemake.output.html}") if snakemake.output.zip != zip_path: shell("mv {zip_path} {snakemake.output.zip}")
__author__ = "Ryan Dale" __copyright__ = "Copyright 2016, Ryan Dale" __email__ = "*****@*****.**" __license__ = "MIT" import os from snakemake.shell import shell try: extra = snakemake.params.extra except AttributeError: extra = "" if snakemake.log: log = "> {} 2>&1".format(snakemake.log) else: log = "" outdir = os.path.dirname(snakemake.output[0]) shell( "kallisto quant " "--index {snakemake.input.index} " "-o {snakemake.output} " "--threads {snakemake.threads} " "{extra} " "{snakemake.input.fastq} " "{log} ")
__author__ = "Ali Ghaffaari" __copyright__ = "Copyright 2018, Ali Ghaffaari" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell log = snakemake.log_fmt_shell(stdout=False) shell( "(vg sim {snakemake.params} --xg-name {snakemake.input.xg}" " --threads {snakemake.threads} > {snakemake.output.reads}) {log}" )
__author__ = "Sean Davis" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell shell(""" MEM="8" module load picard java -Xmx${{MEM}}g -jar $PICARDJARPATH/picard.jar MarkDuplicates VALIDATION_STRINGENCY=SILENT I={snakemake.input} O={snakemake.output} AS=true 2> {snakemake.log}""")
"""Snakemake wrapper for running samtools depth.""" __author__ = "Dayne L Filer" __copyright__ = "Copyright 2020, Dayne L Filer" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell params = snakemake.params.get("extra", "") # check for optional bed file bed = snakemake.input.get("bed", "") if bed: bed = "-b " + bed shell("samtools depth {params} {bed} " "-o {snakemake.output[0]} {snakemake.input.bams}")
# specify output: vcf= shell(""" echo {buffer_size} VEP_VERSION="83" VEP_ASSEMBLY="GRCh37" module load VEP/${{VEP_VERSION}} module load samtools mkdir -p /lscratch/$SLURM_JOBID/homo_sapiens/${{VEP_VERSION}}_${{VEP_ASSEMBLY}} cp -r /fdb/VEP/${{VEP_VERSION}}/cache/homo_sapiens/${{VEP_VERSION}}_${{VEP_ASSEMBLY}} /lscratch/${{SLURM_JOBID}}/homo_sapiens cp -r /fdb/VEP/${{VEP_VERSION}}/cache/${{VEP_ASSEMBLY}}.fa* /lscratch/${{SLURM_JOBID}}/ export CACHE_DIR=/lscratch/${{SLURM_JOBID}}/ export CADD_DIR=/data/CCRBioinfo/public/CADD export EXAC_DIR=/fdb/exac/release0.3 export CACHE_DIR=/lscratch/${{SLURM_JOB_ID}} variant_effect_predictor.pl \ -i {snakemake.input.vcf} --offline --cache \ --dir_cache $CACHE_DIR --fasta $CACHE_DIR/${{VEP_ASSEMBLY}}.fa \ --output {snakemake.output.vcf} --fork ${{SLURM_CPUS_ON_NODE}} \ —sift s --polyphen s --vcf --pick \ --symbol --buffer_size {buffer_size} --biotype --hgvs --assembly ${{VEP_ASSEMBLY}} \ --gene_phenotype --gmaf --check_existing \ --pubmed --force_overwrite \ --maf_1kg --maf_esp --regulatory --domains --numbers \ --uniprot --xref_refseq \ --plugin CADD,$CADD_DIR/whole_genome_SNVs.tsv.gz \ --plugin ExAC,$EXAC_DIR/ExAC.r0.3.sites.vep.vcf.gz \ --plugin CSN,1 \ --plugin Carol """)
__author__ = "Patrik Smeds" __copyright__ = "Copyright 2021, Patrik Smeds" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell from snakemake_wrapper_utils.bcftools import get_bcftools_opts bcftools_opts = get_bcftools_opts(snakemake) log = snakemake.log_fmt_shell(stdout=False, stderr=True) if len(snakemake.output) > 1: raise Exception("Only one output file expected, got: " + str(len(snakemake.output))) filter = snakemake.params.get("filter", "") extra = snakemake.params.get("extra", "") shell("bcftools filter {filter} {snakemake.input[0]} " "{bcftools_opts} " "-o {snakemake.output[0]} " "{log}")
__author__ = "Behram Radmanesh" __copyright__ = "Copyright 2016, Behram Radmanesh" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell try: extra = snakemake.params.extra except AttributeError: extra = "" if snakemake.log: log = "> {} 2>&1".format(snakemake.log) else: log = "" shell( "Rscript {snakemake.input.dupRScript} " "{snakemake.input.dupBAM} " "gtf={snakemake.input.GTF} " "stranded={snakemake.params.stranded} " "paired={snakemake.params.paired} " "outfile={snakemake.output[0]} " "threads={snakemake.threads}" )
__author__ = "William Rowell" __copyright__ = "Copyright 2020, William Rowell" __email__ = "*****@*****.**" __license__ = "MIT" from snakemake.shell import shell extra = snakemake.params.get("extra", "") log = snakemake.log_fmt_shell(stdout=True, stderr=True) shell(""" (whatshap phase \ {extra} \ --chromosome {snakemake.wildcards.chromosome} \ --output {snakemake.output} \ --reference {snakemake.input.reference} \ {snakemake.input.vcf} \ {snakemake.input.phaseinput}) {log} """)
__author__ = "Sebastian Kurscheid" __copyright__ = "Copyright 2016, Sebastian Kurscheid" __email__ = "*****@*****.**" __license__ = "MIT" __date__ = "2016-08-08" __version__ = 0.2 from snakemake.shell import shell shell(""" kallisto quant --index={snakemake.input.ki} \ --output-dir={snakemake.output} \ --threads={snakemake.threads} \ --bootstrap-samples={snakemake.params.bootstraps} \ {snakemake.input.read1} {snakemake.input.read2} """)
######## Snakemake header ######## import sys sys.path.insert( 0, "/home/athersh/miniconda3/envs/snakemake/lib/python3.5/site-packages") import pickle snakemake = pickle.loads( b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\t\x00\x00\x00resourcesq\x03csnakemake.io\nResources\nq\x04)\x81q\x05(K\x01K\x01e}q\x06(X\x06\x00\x00\x00_namesq\x07}q\x08(X\x06\x00\x00\x00_nodesq\tK\x00N\x86q\nX\x06\x00\x00\x00_coresq\x0bK\x01N\x86q\x0cuh\tK\x01h\x0bK\x01ubX\t\x00\x00\x00wildcardsq\rcsnakemake.io\nWildcards\nq\x0e)\x81q\x0fX\x10\x00\x00\x00sjl_cl8_cp190_R1q\x10a}q\x11(h\x07}q\x12X\x06\x00\x00\x00sampleq\x13K\x00N\x86q\x14sX\x06\x00\x00\x00sampleq\x15h\x10ubX\x06\x00\x00\x00paramsq\x16csnakemake.io\nParams\nq\x17)\x81q\x18(X\x1f\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1q\x19h\x10e}q\x1a(h\x07}q\x1b(X\x06\x00\x00\x00prefixq\x1cK\x00N\x86q\x1dX\x0e\x00\x00\x00wrapper_sampleq\x1eK\x01N\x86q\x1fuh\x1ch\x19h\x1eh\x10ubX\x04\x00\x00\x00ruleq X\x05\x00\x00\x00macs2q!X\x07\x00\x00\x00threadsq"K\x01X\x05\x00\x00\x00inputq#csnakemake.io\nInputFiles\nq$)\x81q%(X\x1a\x00\x00\x00dedup/sjl_cl8_input_R1.bamq&X\x1a\x00\x00\x00dedup/sjl_cl8_cp190_R1.bamq\'e}q((h\x07}q)(X\x03\x00\x00\x00inpq*K\x00N\x86q+X\x02\x00\x00\x00ipq,K\x01N\x86q-uh*h&h,h\'ubX\x06\x00\x00\x00outputq.csnakemake.io\nOutputFiles\nq/)\x81q0(XA\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1/sjl_cl8_cp190_R1_peaks.narrowPeakq1X4\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1/sjl_cl8_cp190_R1.bedq2e}q3(h\x07}q4(X\n\x00\x00\x00narrowPeakq5K\x00N\x86q6X\x03\x00\x00\x00bedq7K\x01N\x86q8uh5h1h7h2ubX\x03\x00\x00\x00logq9csnakemake.io\nLog\nq:)\x81q;}q<h\x07}q=sbX\x06\x00\x00\x00configq>}q?ub.' ) ######## Original script ######### from snakemake.shell import shell shell('macs2 ' 'callpeak ' '-c {snakemake.input.inp} ' '-t {snakemake.input.ip} ' '--bdg --SPMR ' '-n {snakemake.wildcards.sample} ' '--outdir {snakemake.params.prefix}') shell( 'Rscript {snakemake.params.prefix}/{snakemake.params.wrapper_sample}_model.r > {snakemake.params.prefix}/{snakemake.params.wrapper_sample}_model.pdf' ) shell("ln -sf {snakemake.output.narrowPeak} {snakemake.output.bed}")