def run_bowtie2(paired_end_mapping, genome, output_path, disable_parallel=False): bowtie2_logger = _logshim.getLogger('run_bowtie2') # Import the config file to get genome locations config = _script_helpers.get_config() if disable_parallel: shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger) else: shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger, delay_seconds=60) for output_prefix, paired_ends in paired_end_mapping.iteritems(): bowtie2_logger.info('Spawning niced process for bowtie2 on: %s' % (output_prefix)) for filename in paired_ends: assert (" " not in filename) assert (";" not in filename ) # Vague sanity testing for input filenames bowtie2_logger.debug(' Input: %s' % (filename)) # bowtie2 options: # --end-to-end: this is the default, but let's explicitly specify it # --sensitive: again, the default (consider switching to --fast?) # --no-unal: Suppress unaligned reads from the output .sam # --no-discordant: These are paired-end reads. We expect them to be non-discordant. # --mm: mmap MAP_SHARED (other processes can use our genome, cool!) # --met-stderr: Write metrics to stderr # --time: output the time things took # -x: target genome command = "bowtie2 --end-to-end --sensitive --no-unal --no-discordant --mm --met-stderr --time -x %s -1 %s -2 %s 2>%s | samtools view -bS - >%s" shell_job_runner.run( command % (config['bowtie2_genomes'][genome], paired_ends[0], paired_ends[1], output_path + "/" + output_prefix + ".bt2.log", output_path + "/" + output_prefix + ".bt2.bam")) shell_job_runner.finish()
def run_bowtie2(paired_end_mapping, genome, output_path, disable_parallel=False): bowtie2_logger = _logshim.getLogger('run_bowtie2') # Import the config file to get genome locations config = _script_helpers.get_config() if disable_parallel: shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger) else: shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger, delay_seconds=60) for output_prefix, paired_ends in paired_end_mapping.iteritems(): bowtie2_logger.info('Spawning niced process for bowtie2 on: %s' % (output_prefix)) for filename in paired_ends: assert(" " not in filename) assert(";" not in filename) # Vague sanity testing for input filenames bowtie2_logger.debug(' Input: %s' % (filename)) # bowtie2 options: # --end-to-end: this is the default, but let's explicitly specify it # --sensitive: again, the default (consider switching to --fast?) # --no-unal: Suppress unaligned reads from the output .sam # --no-discordant: These are paired-end reads. We expect them to be non-discordant. # --mm: mmap MAP_SHARED (other processes can use our genome, cool!) # --met-stderr: Write metrics to stderr # --time: output the time things took # -x: target genome command = "bowtie2 --end-to-end --sensitive --no-unal --no-discordant --mm --met-stderr --time -x %s -1 %s -2 %s 2>%s | samtools view -bS - >%s" shell_job_runner.run(command % (config['bowtie2_genomes'][genome], paired_ends[0], paired_ends[1], output_path + "/" + output_prefix + ".bt2.log", output_path + "/" + output_prefix + ".bt2.bam")) shell_job_runner.finish()
__copyright__ = 'Gordon Lab at Washington University in St. Louis' __license__ = 'MIT' __version__ = '1.0.3' import _logshim import _script_helpers import argparse import glob import os import tempfile # A parameter needed by samtools to sort in-memory. MAX_MEM = "50G" # Load our config files CONFIG = _script_helpers.get_config() def large_filter_fixmate_and_sort(input_files, genome, output_path, disable_parallel=False): primary_logger = _logshim.getLogger('first_pass') output_suffix = ".tmp" if disable_parallel: # Doesn't change parallelism in last samtools sort shell_job_runner = _script_helpers.ShellJobRunner(primary_logger) else: shell_job_runner = _script_helpers.ShellJobRunner(primary_logger, delay_seconds=60) # We do a few things here: # - View only mapping quality >= 10 # - Remove chrM