Example #1
0
def external_script(_command):
    import inspect, json, tempfile

    VARS_TO_PASS = '******'.split()

    callerlocal = inspect.currentframe().f_back.f_locals
    callerglobal = inspect.currentframe().f_back.f_globals
    packed = {}
    for var in VARS_TO_PASS:
        if isinstance(callerlocal[var], int):
            packed[var] = callerlocal[var]
        else:
            packed[var] = list(callerlocal[var].allitems())

    with tempfile.NamedTemporaryFile(mode='wt') as tmpfile:
        json.dump(packed, tmpfile)
        tmpfile.flush()
        os.environ[PARAMETER_PASSING_ENVVAR] = tmpfile.name

        try:
            locals().update(callerglobal)
            locals().update(callerlocal)
            shell(_command)
        finally:
            del os.environ[PARAMETER_PASSING_ENVVAR]
Example #2
0
def get_cuda_arch():
    '''
    Determine currently installed NVIDIA GPU cards by PCI device ID
    and match them with the predefined GPU model lists.
    It tries to detect all GPUs and include cubins suitable for all GPU
    architectures detected.
    If your GPU is not detected correctly, update *_DEVICES
    files by referring https://pci-ids.ucw.cz/v2.2/pci.ids
    and make a pull request!
    '''
    pci_list = str(shell('lspci -nn', read=True))
    supported_archs = ['MAXWELL', 'KEPLER', 'FERMI']
    devtypes_found = set()
    for devtype in supported_archs:
        fname = devtype + '_DEVICES'
        with open(fname, 'r') as f:
            for line in f:
                line = line.strip()
                if not line: continue
                model, pciid = line.split('\t')
                pciid = pciid.replace('0x', '')
                if pciid in pci_list:
                    devtypes_found.add(devtype)
    if len(devtypes_found) == 0:
        return []
    return list(sorted(devtypes_found, key=lambda k: supported_archs.index(k)))
Example #3
0
def fasta_postprocess(origfn, newfn):
    """
    The fasta from UCSC comes as a tarball of fastas. So we extract them all to
    a temp directory and then cat them all together into the final fa.gz file.
    """
    assert (
        (isinstance(origfn, list)) and (len(origfn) == 1)
    ), 'unexpected input: %s' % origfn
    origfn = origfn[0]
    t = tarfile.open(origfn)
    shell('mkdir -p {origfn}.tmp')
    t.extractall(origfn + '.tmp')
    with gzip.open(newfn, 'wt') as fout:
        for fa in sorted(glob.glob(origfn + '.tmp/*.fa')):
            print(fa)
            fout.write(open(fa).read())
    shell('rm -r {origfn}.tmp')
Example #4
0
def rscript(string, scriptname, log=None):
    """
    Saves the string as `scriptname` and then runs it

    Parameters
    ----------
    string : str
        Filled-in template to be written as R script

    scriptname : str
        File to save script to

    log : str
        File to redirect stdout and stderr to. If None, no redirection occurs.
    """
    with open(scriptname, 'w') as fout:
        fout.write(string)
    if log:
        _log = '> {0} 2>&1'.format(log)
    else:
        _log = ""
    shell('Rscript {scriptname} {_log}')
Example #5
0
from snakemake.shell import shell
from os import path

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

def inputCmd(subsets):
    cond_a_files = subsets[0]
    cond_b_files = subsets[1]
    input_cmd = "-a {} -b {}".format(cond_a_files, cond_b_files)
    print(input_cmd)
    return input_cmd

quant_subsets = snakemake.params.quant_subsets
input_cmd = inputCmd(quant_subsets)
output_path = snakemake.params.output_path
min_cov = snakemake.params.min_cov
min_sam = snakemake.params.min_sam

shell("whippet-delta.jl "
"{input_cmd} "
"-o {output_path} "
"-r {min_cov} "
"-s {min_sam} "
"{log}")
Example #6
0
# list to a string, here we detect single-end reads by checking if input.fastq
# is a string.
if isinstance(snakemake.input.fastq, str):
    fastqs = '-U {0} '.format(snakemake.input.fastq)
else:
    assert len(snakemake.input.fastq) == 2
    fastqs = '-1 {0} -2 {1} '.format(*snakemake.input.fastq)

# Figure out the prefix based on the input index, which has the format
#
#   prefix.N.ht2
#
# where N is [1-8]. We strip off the .N.ht2 and ensure the remaining prefixes
# are the same.
#
prefixes = list(set(map(lambda x: '.'.join(x.split('.')[:-2]), snakemake.input.index)))
assert len(prefixes) == 1, 'Multiple prefixes detected from "{0}"'.format(snakemake.input.index)
prefix = prefixes[0]

shell(
    "hisat2 "
    "-x {prefix} "
    "{fastqs} "
    "--threads {snakemake.threads} "
    "{extra} "
    "-S {snakemake.output}.sam "
    "{log}"
)

shell("samtools view -Sb {snakemake.output}.sam > {snakemake.output} && rm {snakemake.output}.sam")
Example #7
0
__author__ = "Jusitn fear"
__copyright__ = "Copyright 2016, Justin Fear"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

shell(
    "picard MarkDuplicates "
    "I={snakemake.input.bam} "
    "O={snakemake.output.bam} "
    "{extra} "
    "M={snakemake.output.metrics} "
    "{log}"
)
Example #8
0
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2016, Sebastian Kurscheid"
__email__ = "*****@*****.**"
__license__ = "MIT"
__date__ = "2016-09-14"
__version__ = 0.1

from snakemake.shell import shell

shell("""
            {snakemake.params.macs2_dir}/macs2 callpeak -B \
                                              -t {snakemake.input.chip} \
                                              -c {snakemake.input.input} \
                                              -n {snakemake.wildcards.sample} \
                                              -f {snakemake.params.format} \
                                              --nomodel \
                                              --extsize {snakemake.params.extsize} \
                                              --outdir {snakemake.output}
      """)
Example #9
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("samtools index {snakemake.params} {snakemake.input[0]} {snakemake.output[0]}")
log_dir = os.path.dirname(snakemake.log[0])
output_dir = os.path.dirname(snakemake.output[0])

# sample basename
basename = os.path.splitext(os.path.basename(snakemake.input.bam[0]))[0]

split_inputs = " ".join(str(x) for x in range(0, int(snakemake.threads)))

with tempfile.TemporaryDirectory() as tmp_dir:
    shell(
        "(BIN_DIR=$(ls -d $CONDA_DEFAULT_ENV/share/deepvariant*/binaries/Deepvariant/*/DeepVariant*) \n"
        "parallel --eta --halt 2 --joblog {log_dir}/log --res {log_dir} "
        "python $BIN_DIR/make_examples.zip "
        "--mode calling --ref {snakemake.input.ref} --reads {snakemake.input.bam} "
        "--examples {tmp_dir}/{basename}.tfrecord@{snakemake.threads}.gz "
        "--gvcf {tmp_dir}/{basename}.gvcf.tfrecord@{snakemake.threads}.gz "
        "--task {{}} "
        "::: {split_inputs} \n"
        "dv_call_variants.py "
        "--cores {snakemake.threads} "
        "--outfile {tmp_dir}/{basename}.tmp "
        "--sample {basename} "
        "--examples {tmp_dir} "
        "--model {snakemake.params.model} \n"
        "python $BIN_DIR/postprocess_variants.zip "
        "--ref {snakemake.input.ref} "
        "--infile {tmp_dir}/{basename}.tmp "
        "--outfile {snakemake.output.vcf} "
        "--nonvariant_site_tfrecord_path {tmp_dir}/{basename}.gvcf.tfrecord@{snakemake.threads}.gz "
        "--gvcf_outfile {snakemake.output.gvcf} ) {log}")
Example #11
0
from snakemake.shell import shell
initial_log = snakemake.get_log()
stdout_log = snakemake.get_log(stderr=False, append=True)
stderr_log = snakemake.get_log(stdout=False, append=True)
shell('''
      cat {snakemake.input} > {snakemake.output}
      echo "should not appear since next line truncates" {initial_log}
      echo "first line" {initial_log}
      (>&2 echo "a stderr message") {stderr_log}
      (echo "a stdout message") {stdout_log}
      ''')
Example #12
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

# Samtools takes additional threads through its option -@
# One thread for samtools merge
# Other threads are *additional* threads passed to the '-@' argument
threads = "" if snakemake.threads <= 1 else " -@ {} ".format(
    snakemake.threads - 1)

shell(
    "samtools index {threads} {snakemake.params} {snakemake.input[0]} {snakemake.output[0]} {log}"
)
Example #13
0
__author__ = "David Laehnemann, Victoria Sack"
__copyright__ = "Copyright 2018, David Laehnemann, Victoria Sack"
__email__ = "*****@*****.**"
__license__ = "MIT"

import os
from snakemake.shell import shell

prefix = os.path.splitext(snakemake.output[0])[0]

shell("samtools bam2fq {snakemake.params} "
      " -@ {snakemake.threads} "
      " {snakemake.input[0]}"
      " >{snakemake.output[0]} ")
Example #14
0
__author__ = "Adrien Leger"
__copyright__ = "Copyright 2019, Adrien Leger"
__email__ = "*****@*****.**"
__license__ = "MIT"
__version__ = "0.0.1"

# Imports
from snakemake.shell import shell

# Shortcuts
opt = snakemake.params.get("opt", "")
bam_input = snakemake.input.bam
bam_output = snakemake.output.bam

# Run shell command
shell(
    "pyBioTools Alignment Split {opt} -i {bam_input} -l {bam_output} --verbose &> {snakemake.log}"
)
Example #15
0
"""Snakemake wrapper for ProSolo single-cell-bulk calling"""

__author__ = "David Lähnemann"
__copyright__ = "Copyright 2020, David Lähnemann"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
    "( prosolo single-cell-bulk "
    "--omit-indels "
    " {snakemake.params.extra} "
    "--candidates {snakemake.input.candidates} "
    "--output {snakemake.output} "
    "{snakemake.input.single_cell} "
    "{snakemake.input.bulk} "
    "{snakemake.input.ref} ) "
    "{log} "
)
Example #16
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell(
    "(samtools mpileup {snakemake.params.mpileup} {snakemake.input.samples} "
    "--fasta-ref {snakemake.input.ref} --BCF --uncompressed | "
    "bcftools call -m {snakemake.params.call} -o {snakemake.output[0]} -v -) 2> {snakemake.log}")
Example #17
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
bams = snakemake.input
if isinstance(bams, str):
    bams = [bams]
bams = list(map("INPUT={}".format, bams))

shell("picard MarkDuplicates "  # Tool and its subcommand
      "{java_opts} "  # Automatic java option
      "{extra} "  # User defined parmeters
      "{bams} "  # Input bam(s)
      "OUTPUT={snakemake.output.bam} "  # Output bam
      "METRICS_FILE={snakemake.output.metrics} "  # Output metrics
      "{log}"  # Logging
      )
Example #18
0

log=snakemake.log_fmt_shell(stdout=True,stderr=True)
target=snakemake.params.target


if target['catg']=="expr":
	ref=target['ref']
	start=target['start']
	end=target['end']
	bamfile=snakemake.input.bam

	tmpfile=tempfile.mktemp()
	shell(
		"samtools view -b {bamfile} {ref}:{start}-{end} > {tmpfile} ;"
		"samtools sort {tmpfile} >  {snakemake.output[0]} ;"	
		"samtools index {snakemake.output[0]} "
	)


if target['catg']=="fusion":
	ref1=target['r1']['ref']
	start1=target['r1']['start']
	end1=target['r1']['end']
	bamfile=snakemake.input.chimeric

	ref2=target['r2']['ref']
	start2=target['r2']['start']
	end2=target['r2']['end']

	tmpfile=tempfile.mktemp()
Example #19
0
fastq = [fastq] if isinstance(fastq, str) else fastq
if len(fastq) > 2:
    raise RuleInputException(
        'Your sequencing read should be single-read or paired-end.')

single_flag = '' if len(fastq) == 2 else '--single'
if single_flag and (fragment_length == '' or standard_deviation == ''):
    raise RuleParameterException(
        'Please provide fragment length(-l) and standard deviation(-s) parameter for single-end reads.'
    )
fastq = ' '.join(fastq)

index = snakemake.input.index
threads = snakemake.threads

output_directory = path.dirname(snakemake.output[0])

# Execute shell command.
shell("("
      "kallisto quant "
      "-i {index} "
      "-o {output_directory} "
      "-t {threads} "
      "{fragment_length} "
      "{standard_deviation} "
      "{single_flag} "
      "{extra} "
      "{fastq}"
      ")"
      "{log}")
Example #20
0
__author__ = "Max Cummins"
__copyright__ = "Copyright 2021, Max Cummins"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell
from os import path

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

shell("assembly-stats"
      " {snakemake.params.extra}"
      " {snakemake.input.assembly}"
      " > {snakemake.output.assembly_stats}"
      " {log}")
Example #21
0
__author__ = "Jack Zhu"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("""
    module load STAR/2.5.1b
    STAR-Fusion --genome_lib_dir {snakemake.params.genome_lib_dir} \
                 -J Chimeric.out.junction \
                 --output_dir {snakemake.params.star_fusion_outdir} \
                 --tmpdir /lscratch/${{SLURM_JOBID}}
""")

Example #22
0
# Extract parameters.
extra = snakemake.params.get('extra', '')

# Extract required inputs.
input_file = snakemake.input[0]
input_command = '-i %s' % input_file

# Extract optional inputs.
output_file = snakemake.output[0]
output_command = '-o %s' % output_file

# Extract user parameters.
user_parameters = []
user_parameters.append(optionify_params('gsize', '--gsize'))
user_parameters.append(optionify_params('tsize', '--tsize'))
user_parameters.append(optionify_params('pvalue', '--pvalue'))
user_parameters.append(optionify_params('keep_dup', '--keep_dup'))
user_parameters.append(optionify_params('verbose', '--verbose'))
user_parameters = ' '.join([p for p in user_parameters if not p != ''])

# Execute shell command.
shell("("
      "macs2 filterdup "
      "{input_command} "
      "{output_command} "
      "{user_parameters} "
      "{extra} "
      ") "
      "{log}")
Example #23
0
try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

# Figure out the prefix based on the input index, which has the format
#
#   prefix.N.bt2
#
# where N is [1-4]. We strip off the .N.bt2 and ensure the remaining prefixes
# are the same.
#
prefixes = list(set(map(lambda x: '.'.join(x.split('.')[:-2]), snakemake.output)))
assert len(prefixes) == 1, 'Multiple prefixes detected from "{0}"'.format(snakemake.output)
prefix = prefixes[0]

shell(
    "bowtie2-build "
    "--threads {snakemake.threads} "
    "{extra} "
    "{snakemake.input} "
    "{prefix} "
    "{log}"
)
Example #24
0
__author__ = "Adrien Leger"
__copyright__ = "Copyright 2019, Adrien Leger"
__email__ = "*****@*****.**"
__license__ = "MIT"
__version__ = "0.0.3"

# Imports
from snakemake.shell import shell
import os

# Shortcuts
opt = snakemake.params.get("opt", "")
ref = snakemake.input.ref
index_dir = snakemake.output.index_dir
os.makedirs(index_dir, exist_ok=True)

# Run shell command
shell(
    "salmon index {opt} -p {snakemake.threads} -t {ref} -i {index_dir} &> {snakemake.log}"
)
Example #25
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


import os
from snakemake.shell import shell


prefix = os.path.splitext(snakemake.output[0])[0]

shell(
    "samtools sort {snakemake.params} -@ {snakemake.threads} -o {snakemake.output[0]} "
    "-T {prefix} {snakemake.input[0]}")
Example #26
0
"""Snakemake wrapper for trimming paired-end reads using cutadapt."""

__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

n = len(snakemake.input)
assert n == 2, "Input must contain 2 (paired-end) elements."

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

shell("cutadapt"
      " {snakemake.params.adapters}"
      " {snakemake.params.others}"
      " -o {snakemake.output.fastq1}"
      " -p {snakemake.output.fastq2}"
      " -j {snakemake.threads}"
      " {snakemake.input}"
      " > {snakemake.output.qc} {log}")
Example #27
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell



shell("""
(module load fastqc; \
fastqc --extract -t {snakemake.threads} -o {snakemake.params.outdir} {snakemake.input} ) 2> {snakemake.log}""")


Example #28
0
# Imports
from snakemake.shell import shell
from pyfaidx import Fasta
from math import log2
import os

# Wrapper info
wrapper_name = "star_index"
wrapper_version = "0.0.4"
author = "Adrien Leger"
license = "MIT"
shell(
    "echo 'Wrapper {wrapper_name} v{wrapper_version} / {author} / Licence {license}' > {snakemake.log}"
)

# Shortcuts
opt = snakemake.params.get("opt", "")
ref = snakemake.input.ref
annotation = snakemake.input.annotation
index_dir = os.path.abspath(snakemake.output.index_dir) + "/"
os.makedirs(index_dir, exist_ok=True)

# Comput index base depending on genome length
genome_len = 0
with Fasta(ref) as fa:
    for seq in fa:
        genome_len += len(seq)
indexNbases = min(14, int(log2(genome_len) / 2) - 1)

# Run shell command
shell("STAR {opt} \
Example #29
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("samtools merge --threads {snakemake.threads} {snakemake.params} "
      "{snakemake.output[0]} {snakemake.input}")
Example #30
0
    cmds.append("--dir {output_dir:q}")

if html_file:
    html_file_name = os.path.basename(html_file)
    cmds.append("--output {html_file_name:q}")

# reports
reports = [
    "alignment_report",
    "dedup_report",
    "splitting_report",
    "mbias_report",
    "nucleotide_report",
]
skip_optional_reports = answer2bool(
    snakemake.params.get("skip_optional_reports", False))
for report_name in reports:
    path = snakemake.input.get(report_name, "")
    if path:
        locals()[report_name] = path
        cmds.append("--{0} {{{1}:q}}".format(report_name, report_name))
    elif skip_optional_reports:
        cmds.append("--{0} 'none'".format(report_name))

# log
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
cmds.append("{log}")

# run shell command:
shell(" ".join(cmds))
Example #31
0
# the veqtl-mapper swarming mechanism is really silly...
# we need to count the number of lines in the intput file
with open(snakemake.input['pheno']) as f:
    n_features = sum(1 for line in f)
n_features -= 1  # drop header

# get the number of features we want to use per run
n_features_per_job = int(
    np.ceil(n_features / int(snakemake.wildcards['j_total'])))

# do we need the final job? -- could do this modulo
total_n = n_features_per_job * (int(snakemake.wildcards['j_total']) - 1)
final_job = snakemake.wildcards['j_cur'] == snakemake.wildcards['j_total']
if total_n >= n_features and final_job:
    # we don't need the final job. example: 608 gene and 30 jobs
    shell('touch %s' % snakemake.output[0])
else:
    # start to build the command
    cmd = 'veqtl-mapper --vcf %s' % (snakemake.input['geno'])
    cmd = '%s --bed %s' % (cmd, snakemake.input['pheno'])
    cmd = '%s --genes %d' % (cmd, n_features_per_job)
    cmd = '%s --job-number %s' % (cmd, snakemake.wildcards['j_cur'])
    cmd = '%s --out %s' % (cmd, snakemake.output[0])

    params = dict(snakemake.params.items())

    window = params.get("window", "1000000")  # cis window default = 1Mb
    cmd = '%s --window %s' % (cmd, window)

    if 'other_settings' in params:
        cmd = '%s %s' % (cmd, snakemake.params['other_settings'])
Example #32
0
from snakemake.shell import shell
log = snakemake.get_log(stdout=False, append=True)
shell('''
      cat {snakemake.input} > {snakemake.output}
      (>&2 echo "a stderr message") {log}
      (echo "a stdout message") {log}
      ''')
Example #33
0
"""Snakemake wrapper for Salmon Index."""

__author__ = "Tessa Pierce"
__copyright__ = "Copyright 2018, Tessa Pierce"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)
extra = snakemake.params.get("extra", "")

shell("salmon index -t {snakemake.input} -i {snakemake.output} "
      " --threads {snakemake.threads} {extra} {log}")
Example #34
0
def plus_lncrna_fasta_postprocess(tmpfiles, outfile):
    shell('gunzip -c {tmpfiles} > {outfile}')
Example #35
0
import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

filters = [
    "--filter-name {} --filter-expression '{}'".format(
        name, expr.replace("'", "\\'"))
    for name, expr in snakemake.params.filters.items()
]

intervals = snakemake.input.get("intervals", "")
if not intervals:
    intervals = snakemake.params.get("intervals", "")
if intervals:
    intervals = "--intervals {}".format(intervals)

with tempfile.TemporaryDirectory() as tmpdir:
    shell("gatk --java-options '{java_opts}' VariantFiltration"
          " --variant {snakemake.input.vcf}"
          " --reference {snakemake.input.ref}"
          " {filters}"
          " {intervals}"
          " {extra}"
          " --tmp-dir {tmpdir}"
          " --output {snakemake.output.vcf}"
          " {log}")
Example #36
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell



shell("""
#######################
MEM="48"
module load GATK
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T RealignerTargetCreator -R {snakemake.input.ref} -nt ${{SLURM_CPUS_ON_NODE}} -known {snakemake.input.phase1} -known {snakemake.input.mills} \
    -I {snakemake.input.bam} -o /lscratch/${{SLURM_JOBID}}/realignment.intervals > {snakemake.log} 2>&1
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T IndelRealigner -R {snakemake.input.ref} -known {snakemake.input.phase1} -known {snakemake.input.mills} \
    -I {snakemake.input.bam} --targetIntervals /lscratch/${{SLURM_JOBID}}/realignment.intervals \
    -o /lscratch/${{SLURM_JOBID}}/lr.bam >>{snakemake.log} 2>&1
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T BaseRecalibrator -R {snakemake.input.ref} -knownSites {snakemake.input.phase1} -knownSites {snakemake.input.mills} \
    -I /lscratch/${{SLURM_JOBID}}/lr.bam -nct ${{SLURM_CPUS_ON_NODE}} \
    -o /lscratch/${{SLURM_JOBID}}/recalibration.matrix.txt >>{snakemake.log} 2>&1
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T PrintReads -R {snakemake.input.ref} -I /lscratch/${{SLURM_JOBID}}/lr.bam \
    -nct ${{SLURM_CPUS_ON_NODE}} \
    -o {snakemake.output.bam} -BQSR /lscratch/${{SLURM_JOBID}}/recalibration.matrix.txt >>{snakemake.log} 2>&1
######################
""")
    counts = np.fromstring(inpf.read(), np.uint32).reshape((cycles, bins))
    return counts.astype(np.uint64)

def write_sig_dists(counts, filename):
    with gzip.open(filename, 'wb') as outf:
        outf.write(struct.pack('<III', 4, counts.shape[0], counts.shape[1]))
        outf.write(counts.astype(np.uint32).tostring())

counts_aggr = None

for signals, taginfo, out in zip(input.signals, input.taginfo,
                                 output.taginfo):
    cmd = format('{BINDIR}/tailseq-polya-ruler {wildcards.tile} {signals} \
        {input.score_cutoffs} {CONF[polyA_finder][signal_analysis_trigger]} \
        {CONF[polyA_ruler][downhill_extension_weight]} \
        {taginfo} {CONF[polyA_seeder][dist_sampling_bins]} \
        {CONF[polyA_ruler][signal_resampling_gap]} \
        {output.sigdists} | {BGZIP_CMD} -c > {out}', wildcards=wildcards,
        input=input, output=output)
    shell(cmd)

    counts_new = load_sig_dists(output.sigdists)
    if counts_aggr is None:
        counts_aggr = counts_new
    else:
        counts_aggr += counts_new

write_sig_dists(counts_aggr, output.sigdists)

Example #38
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

shell("""
module load igvtools
igvtools count {snakemake.input} {snakemake.output} {snakemake.params.genome}
""")
Example #39
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

# assumes that bams are coming in as a list

shell("""
module load samtools
( samtools view -h {snakemake.input} | \
  sed 's/\tBI\:Z\:[^\t]*//' | \
  sed 's/\tBI\:Z\:[^\t]*//' | samtools view -bS - > {snakemake.output} ) >& {snakemake.log}
""")
Example #40
0
assert n_reads == 1, "Input must contain 1 fastq files. Given: %r." % [
    n_reads, snakemake.input.reads
]
assert n_fastqc_html == 1, "Input must contain 1 fastqc html reports. Given: %r." % n_fastqc_html
assert n_fastqc_zip == 1, "Input must contain 1 fastqc .zip files. Given: %r." % n_fastqc_zip

# Don't run with `--fastqc` flag
if "--fastqc" in snakemake.params.get("extra", ""):
    raise ValueError("The trim_galore Snakemake wrapper cannot "
                     "be run with the `--fastqc` flag. Please "
                     "remove the flag from extra params. "
                     "You can use the fastqc Snakemake wrapper on "
                     "the input and output files instead.")

# Check that four output files were supplied
m = len(snakemake.output)
assert m == 2, "Output must contain 2 files. Given: %r." % m

# Check that all output files are in the same directory
out_dir = os.path.dirname(snakemake.output[0])
for file_path in snakemake.output[1:]:
    assert out_dir == os.path.dirname(file_path), \
        "trim_galore can only output files to a single directory." \
        " Please indicate only one directory for the output files."

shell("(trim_galore"
      " {snakemake.params.extra}"
      " -o {out_dir}"
      " {snakemake.input.reads})"
      " {log}")
Example #41
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

# assumes that "extra" files are available. These should be included in the
# input specification for the rule

# the awk line replaces spaces in the info field (not allowed) with "_".
# This was necessary because Kaviar has spaces in the database names

shell("""
module load vcfanno
vcfanno -p ${{SLURM_CPUS_ON_NODE}} {snakemake.input.config} {snakemake.input.vcf} \
  | awk -F'\t' -vOFS='\t' '{{ gsub(" ", "_", $8) ; print }}' > {snakemake.output}
""")
Example #42
0
######## Snakemake header ########
import sys
sys.path.insert(
    0, "/home/athersh/miniconda3/envs/snakemake/lib/python3.5/site-packages")
import pickle
snakemake = pickle.loads(
    b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(X\x19\x00\x00\x00dedup/sjl_kc_input_R1.bamq\x06X\x18\x00\x00\x00dedup/sjl_kc_shep_R1.bamq\x07e}q\x08(X\x03\x00\x00\x00inpq\th\x06X\x02\x00\x00\x00ipq\nh\x07X\x06\x00\x00\x00_namesq\x0b}q\x0c(h\tK\x00N\x86q\rh\nK\x01N\x86q\x0euubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(X=\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1/sjl_kc_shep_R1_peaks.narrowPeakq\x12X0\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1/sjl_kc_shep_R1.bedq\x13e}q\x14(X\n\x00\x00\x00narrowPeakq\x15h\x12X\x03\x00\x00\x00bedq\x16h\x13h\x0b}q\x17(h\x15K\x00N\x86q\x18h\x16K\x01N\x86q\x19uubX\t\x00\x00\x00wildcardsq\x1acsnakemake.io\nWildcards\nq\x1b)\x81q\x1cX\x0e\x00\x00\x00sjl_kc_shep_R1q\x1da}q\x1e(X\x06\x00\x00\x00sampleq\x1fh\x1dh\x0b}q X\x06\x00\x00\x00sampleq!K\x00N\x86q"subX\x03\x00\x00\x00logq#csnakemake.io\nLog\nq$)\x81q%}q&h\x0b}q\'sbX\x07\x00\x00\x00threadsq(K\x01X\x04\x00\x00\x00ruleq)X\x05\x00\x00\x00macs2q*X\x06\x00\x00\x00paramsq+csnakemake.io\nParams\nq,)\x81q-X\x1d\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1q.a}q/(X\x06\x00\x00\x00prefixq0h.h\x0b}q1h0K\x00N\x86q2subX\x06\x00\x00\x00configq3}q4X\t\x00\x00\x00resourcesq5csnakemake.io\nResources\nq6)\x81q7(K\x01K\x01e}q8(X\x06\x00\x00\x00_coresq9K\x01X\x06\x00\x00\x00_nodesq:K\x01h\x0b}q;(h9K\x00N\x86q<h:K\x01N\x86q=uubub.'
)
######## Original script #########
from snakemake.shell import shell

shell('macs2 '
      'callpeak '
      '-c {snakemake.input.inp} '
      '-t {snakemake.input.ip} '
      '--bdg --SPMR '
      '-n {snakemake.wildcards.sample} '
      '--outdir {snakemake.params.prefix}')
shell('Rscript {snakemake.params.prefix}_model.r')
Example #43
0
__author__ = "Behram Radmanesh"
__copyright__ = "Copyright 2016, Behram Radmanesh"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

shell("""
awk '{{print $3}}' {snakemake.input.sam} \
        | grep "^ch" | sort | uniq -c | sed -e 's/_\|:\| \|-/\\t/g' \
        |  awk '{{OFS="\t";print $2,$3,$4,$1}}' > \
        {snakemake.output}"""
)
Example #44
0
__copyright__ = "Copyright 2018, Patrik Smeds"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

shell.executable("bash")

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

extra_params = snakemake.params.get("extra", "")

bam_input = snakemake.input[0]

if not isinstance(bam_input, str) and len(snakemake.input) != 1:
    raise ValueError("Input bam should be one bam file: " + str(bam_input) + "!")

output_file = snakemake.output[0]

if not isinstance(output_file, str) and len(snakemake.output) != 1:
    raise ValueError("Output should be one bam file: " + str(output_file) + "!")

shell(
    "fgbio SetMateInformation"
    " -i {bam_input}"
    " -o {output_file}"
    " {extra_params}"
    " {log}"
)
Example #45
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

# Assumes that SDST is installed:
# "easy_install git+https://github.com/seandavi/SDST.git"

# input: vcf
# input: bam the RNA-seq bam file
# output: vcf

shell("""
seqtool vcf rnacount -f {snakemake.input.vcf} -o {snakemake.output.vcf} {snakemake.input.bam} 2> {snakemake.log}
""")
Example #46
0
__author__ = "Ali Ghaffaari"
__copyright__ = "Copyright 2018, Ali Ghaffaari"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

log = snakemake.log_fmt_shell()

shell(
    "(wgsim {snakemake.params} {snakemake.input.ref}"
    " {snakemake.output.read1} {snakemake.output.read2}) {log}"
)
Example #47
0
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("""
    module load samtools/1.3 picard/1.139

    ## Generate intervals for hsmetrics
    cat <(samtools view -H {snakemake.input} ) <(gawk '{{print $1 "\t" $2+1 "\t" $3 "\t+\tinterval_" NR}}' {snakemake.params.BAIT_INTERVALS} )> {snakemake.input}.BAIT_INTERVALS
    cat <(samtools view -H {snakemake.input} ) <(gawk '{{print $1 "\t" $2+1 "\t" $3 "\t+\tinterval_" NR}}' {snakemake.params.TARGET_INTERVALS} )> {snakemake.input}.TARGET_INTERVALS
    
    ## CalculateHsMetrics
    MEM=$((SLURM_MEM_PER_NODE / 1024))
    #MEM="22"
    java -Xmx${{MEM}}g -jar $PICARDJARPATH/picard.jar \
    CalculateHsMetrics \
    BAIT_INTERVALS={snakemake.input}.BAIT_INTERVALS \
    TARGET_INTERVALS={snakemake.input}.TARGET_INTERVALS \
    I={snakemake.input} \
    O={snakemake.output} \
    AS=true \
    VALIDATION_STRINGENCY=SILENT \
    2> {snakemake.log}
    
    rm -f {snakemake.input}.BAIT_INTERVALS {snakemake.input}.TARGET_INTERVALS
""")

Example #48
0
__author__ = "Per Unneberg"
__copyright__ = "Copyright 2020, Per Unneberg"
__email__ = "*****@*****.**"
__license__ = "MIT"

import os
from snakemake.shell import shell

options = snakemake.params.get("options", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

analysis = snakemake.wildcards.analysis
outdir = os.path.join("results", "genecovr", analysis)

shell("genecovr -p {snakemake.threads} "
      "{snakemake.params.options} "
      "-d {outdir} "
      "{snakemake.input.csv} "
      "{log}")
Example #49
0
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2016, Sebastian Kurscheid"
__email__ = "*****@*****.**"
__license__ = "MIT"
__date__ = "2016-09-07"
__version__ = 0.1

from snakemake.shell import shell

shell("""
        bedtools bamtofastq -i {snakemake.input.bam_file} \
                            -fq {snakemake.output.read1} \
                            -fq2 {snakemake.output.read2}
      """)
Example #50
0
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

def basename_without_ext(file_path):
    """Returns basename of file path, without the file extension."""

    base = path.basename(file_path)

    split_ind = 2 if base.endswith(".gz") else 1
    base = ".".join(base.split(".")[:-split_ind])

    return base


# Run fastqc, since there can be race conditions if multiple jobs 
# use the same fastqc dir, we create a temp dir.
with TemporaryDirectory() as tempdir:
    shell("fastqc {snakemake.params} --quiet "
          "--outdir {tempdir} {snakemake.input[0]}"
          " {log}")

    # Move outputs into proper position.
    output_base = basename_without_ext(snakemake.input[0])
    html_path = path.join(tempdir, output_base + "_fastqc.html")
    zip_path = path.join(tempdir, output_base + "_fastqc.zip")

    if snakemake.output.html != html_path:
        shell("mv {html_path} {snakemake.output.html}")

    if snakemake.output.zip != zip_path:
        shell("mv {zip_path} {snakemake.output.zip}")
Example #51
0
__author__ = "Ryan Dale"
__copyright__ = "Copyright 2016, Ryan Dale"
__email__ = "*****@*****.**"
__license__ = "MIT"

import os
from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

outdir = os.path.dirname(snakemake.output[0])

shell(
    "kallisto quant "
    "--index {snakemake.input.index} "
    "-o {snakemake.output} "
    "--threads {snakemake.threads} "
    "{extra} "
    "{snakemake.input.fastq} "
    "{log} ")
Example #52
0
__author__ = "Ali Ghaffaari"
__copyright__ = "Copyright 2018, Ali Ghaffaari"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=False)

shell(
    "(vg sim {snakemake.params} --xg-name {snakemake.input.xg}"
    " --threads {snakemake.threads} > {snakemake.output.reads}) {log}"
)
Example #53
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("""
MEM="8"
        module load picard
java -Xmx${{MEM}}g -jar $PICARDJARPATH/picard.jar MarkDuplicates VALIDATION_STRINGENCY=SILENT I={snakemake.input} O={snakemake.output} AS=true 2> {snakemake.log}""")

Example #54
0
"""Snakemake wrapper for running samtools depth."""

__author__ = "Dayne L Filer"
__copyright__ = "Copyright 2020, Dayne L Filer"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

params = snakemake.params.get("extra", "")

# check for optional bed file
bed = snakemake.input.get("bed", "")
if bed:
    bed = "-b " + bed

shell("samtools depth {params} {bed} "
      "-o {snakemake.output[0]} {snakemake.input.bams}")
Example #55
0
# specify output: vcf=

shell("""
echo {buffer_size}
VEP_VERSION="83"
VEP_ASSEMBLY="GRCh37"
module load VEP/${{VEP_VERSION}}
module load samtools
mkdir -p /lscratch/$SLURM_JOBID/homo_sapiens/${{VEP_VERSION}}_${{VEP_ASSEMBLY}}
cp -r /fdb/VEP/${{VEP_VERSION}}/cache/homo_sapiens/${{VEP_VERSION}}_${{VEP_ASSEMBLY}} /lscratch/${{SLURM_JOBID}}/homo_sapiens
cp -r /fdb/VEP/${{VEP_VERSION}}/cache/${{VEP_ASSEMBLY}}.fa* /lscratch/${{SLURM_JOBID}}/
export CACHE_DIR=/lscratch/${{SLURM_JOBID}}/
export CADD_DIR=/data/CCRBioinfo/public/CADD
export EXAC_DIR=/fdb/exac/release0.3
export CACHE_DIR=/lscratch/${{SLURM_JOB_ID}}
variant_effect_predictor.pl \
  -i {snakemake.input.vcf} --offline --cache   \
  --dir_cache $CACHE_DIR --fasta $CACHE_DIR/${{VEP_ASSEMBLY}}.fa  \
  --output {snakemake.output.vcf} --fork ${{SLURM_CPUS_ON_NODE}} \
  —sift s --polyphen s --vcf --pick   \
  --symbol --buffer_size {buffer_size} --biotype --hgvs --assembly ${{VEP_ASSEMBLY}} \
  --gene_phenotype --gmaf --check_existing \
  --pubmed  --force_overwrite   \
  --maf_1kg --maf_esp --regulatory --domains --numbers   \
  --uniprot --xref_refseq \
  --plugin CADD,$CADD_DIR/whole_genome_SNVs.tsv.gz   \
  --plugin ExAC,$EXAC_DIR/ExAC.r0.3.sites.vep.vcf.gz   \
  --plugin CSN,1   \
  --plugin Carol
""")

Example #56
0
__author__ = "Patrik Smeds"
__copyright__ = "Copyright 2021, Patrik Smeds"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell
from snakemake_wrapper_utils.bcftools import get_bcftools_opts

bcftools_opts = get_bcftools_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

if len(snakemake.output) > 1:
    raise Exception("Only one output file expected, got: " +
                    str(len(snakemake.output)))

filter = snakemake.params.get("filter", "")
extra = snakemake.params.get("extra", "")

shell("bcftools filter {filter} {snakemake.input[0]} "
      "{bcftools_opts} "
      "-o {snakemake.output[0]} "
      "{log}")
Example #57
0
__author__ = "Behram Radmanesh"
__copyright__ = "Copyright 2016, Behram Radmanesh"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

shell(
    "Rscript {snakemake.input.dupRScript} "
    "{snakemake.input.dupBAM} "
    "gtf={snakemake.input.GTF} "
    "stranded={snakemake.params.stranded} "
    "paired={snakemake.params.paired} "
    "outfile={snakemake.output[0]} "
    "threads={snakemake.threads}"
)
Example #58
0
__author__ = "William Rowell"
__copyright__ = "Copyright 2020, William Rowell"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell("""
    (whatshap phase \
        {extra} \
        --chromosome {snakemake.wildcards.chromosome} \
        --output {snakemake.output} \
        --reference {snakemake.input.reference} \
        {snakemake.input.vcf} \
        {snakemake.input.phaseinput}) {log}
    """)
Example #59
0
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2016, Sebastian Kurscheid"
__email__ = "*****@*****.**"
__license__ = "MIT"
__date__ = "2016-08-08"
__version__ = 0.2

from snakemake.shell import shell

shell("""
            kallisto quant --index={snakemake.input.ki} \
                           --output-dir={snakemake.output} \
                           --threads={snakemake.threads} \
                           --bootstrap-samples={snakemake.params.bootstraps} \
                           {snakemake.input.read1} {snakemake.input.read2}
      """)
Example #60
0
######## Snakemake header ########
import sys
sys.path.insert(
    0, "/home/athersh/miniconda3/envs/snakemake/lib/python3.5/site-packages")
import pickle
snakemake = pickle.loads(
    b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\t\x00\x00\x00resourcesq\x03csnakemake.io\nResources\nq\x04)\x81q\x05(K\x01K\x01e}q\x06(X\x06\x00\x00\x00_namesq\x07}q\x08(X\x06\x00\x00\x00_nodesq\tK\x00N\x86q\nX\x06\x00\x00\x00_coresq\x0bK\x01N\x86q\x0cuh\tK\x01h\x0bK\x01ubX\t\x00\x00\x00wildcardsq\rcsnakemake.io\nWildcards\nq\x0e)\x81q\x0fX\x10\x00\x00\x00sjl_cl8_cp190_R1q\x10a}q\x11(h\x07}q\x12X\x06\x00\x00\x00sampleq\x13K\x00N\x86q\x14sX\x06\x00\x00\x00sampleq\x15h\x10ubX\x06\x00\x00\x00paramsq\x16csnakemake.io\nParams\nq\x17)\x81q\x18(X\x1f\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1q\x19h\x10e}q\x1a(h\x07}q\x1b(X\x06\x00\x00\x00prefixq\x1cK\x00N\x86q\x1dX\x0e\x00\x00\x00wrapper_sampleq\x1eK\x01N\x86q\x1fuh\x1ch\x19h\x1eh\x10ubX\x04\x00\x00\x00ruleq X\x05\x00\x00\x00macs2q!X\x07\x00\x00\x00threadsq"K\x01X\x05\x00\x00\x00inputq#csnakemake.io\nInputFiles\nq$)\x81q%(X\x1a\x00\x00\x00dedup/sjl_cl8_input_R1.bamq&X\x1a\x00\x00\x00dedup/sjl_cl8_cp190_R1.bamq\'e}q((h\x07}q)(X\x03\x00\x00\x00inpq*K\x00N\x86q+X\x02\x00\x00\x00ipq,K\x01N\x86q-uh*h&h,h\'ubX\x06\x00\x00\x00outputq.csnakemake.io\nOutputFiles\nq/)\x81q0(XA\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1/sjl_cl8_cp190_R1_peaks.narrowPeakq1X4\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1/sjl_cl8_cp190_R1.bedq2e}q3(h\x07}q4(X\n\x00\x00\x00narrowPeakq5K\x00N\x86q6X\x03\x00\x00\x00bedq7K\x01N\x86q8uh5h1h7h2ubX\x03\x00\x00\x00logq9csnakemake.io\nLog\nq:)\x81q;}q<h\x07}q=sbX\x06\x00\x00\x00configq>}q?ub.'
)
######## Original script #########
from snakemake.shell import shell

shell('macs2 '
      'callpeak '
      '-c {snakemake.input.inp} '
      '-t {snakemake.input.ip} '
      '--bdg --SPMR '
      '-n {snakemake.wildcards.sample} '
      '--outdir {snakemake.params.prefix}')
shell(
    'Rscript {snakemake.params.prefix}/{snakemake.params.wrapper_sample}_model.r > {snakemake.params.prefix}/{snakemake.params.wrapper_sample}_model.pdf'
)
shell("ln -sf {snakemake.output.narrowPeak} {snakemake.output.bed}")