def main(): e = Environment(version=VERSION, doc=__doc__) e.set_filename_parser(BowtieFilenameParser) # let bwa do the multiprocessing parser = e.argument_parser parser.add_argument('--path-to-bwa', nargs='?', default=path_to_executable('bwa', '/usr/local/bwa*', environ='SOT_PATH_TO_BWA'), help='The path to the bwa executable') parser.add_argument('--path-to-samtools', nargs='?', default=path_to_executable('samtools', '/usr/local/samtools*', environ= 'SOT_PATH_TO_SAMTOOLS'), help='The path to the samtools executable') # fix aliases, should be --ref too parser.add_argument('--reference', dest='references', action='append', help=dedent('''\ Reference genome to align against (should be a fasta file indexed by bwa). This flag may be called multiple times (which will cause each reference to be aligned to separately). If no references are specified, we'll look the for environment variable SOT_DEFAULT_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--passthru-args', nargs='*', help='A list of arguments to be passed through to bwa ' 'Substitute + ' 'for - (e.g., --passthru-args +m 4 50') context = e.get_context() new_references = validate_references(**context) e.update_context({'references': new_references}) sequence = e.get_sequence(**context) e._sequence = merge_pairs(sequence) e.do_action(align_bwa)
def main(): e = Environment(doc=__doc__, version=VERSION) parser = e.argument_parser parser.add_argument('-g', '--genome-size', dest='user_gsize', default=None, help='Optional user-specified genome size (DEFAULT: ' 'script will try to auto-detect the genome)') parser.add_argument('--path-to-macs', default=path_to_executable("macs2"), help="optional path to macs2 executable") parser.add_argument('--no-subpeaks', dest='subpeaks', action='store_false', default=True, help='do not call subpeaks with --call-summits') parser.add_argument('-q', '--q-value', dest='qvalue', default='0.01', help='FDR/q-value cutoff (default is 0.01)') parser.add_argument('--passthru-args', nargs='*', help='A list of arguments to be passed through to ' 'MACS2. Substitute + for - (e.g., ' '--passthru-args +m 4 50') parser.set_defaults(**{'target': 'peaks'}) e.set_filename_parser(BAMFilenameParser) e.set_config_reader(read_setup_file) e.set_config_writer(write_setup_file) e.do_action(run_macs)
def main(): e = Environment(version=VERSION, doc=__doc__) e.set_filename_parser(BowtieFilenameParser) # let bowtie2 do the multiprocessing e.override_num_cpus(1) parser = e.argument_parser parser.add_argument('--path-to-bowtie2', nargs='?', default=path_to_executable('bowtie2', '/usr/local/bowtie2-*', environ= 'SOT_PATH_TO_BOWTIE2'), help='The path to the bowtie2 executable') parser.add_argument('--path-to-samtools', nargs='?', default=path_to_executable('samtools', '/usr/local/samtools*', environ= 'SOT_PATH_TO_SAMTOOLS'), help='The path to the samtools executable') # fix aliases, should be --ref too parser.add_argument('--reference', dest='references', action='append', help=dedent('''\ Reference genome to align against (either a bowtie2 index name or file, or a fasta file). This flag may be called multiple times (which will cause each reference to be aligned to separately). If no references are specified, we'll look the for environment variable SOT_DEFAULT_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--ignore-quality', dest='use_quality', action='store_false', help=dedent('''\ Ignore quality scores if available. Also applies to counter-references if any are called''')) cparser = parser.add_argument_group('counter-alignments', description=dedent('''\ specify counter-reference genome(s)/sequence(s) to use for filtering out unwanted reads.''')) cparser.add_argument('--counter-reference', dest='counter_references', action='append', help=dedent('''\ Optional counter-reference genome/sequences to align against (either a bowtie2 index name or file, or a fasta file). This flag may be called multiple times. All counter-references will be concatenated into one index, and reads will be aligned in --fast mode. Any reads which align will be saved in a separate directory called 'counteraligned' and not aligned against the reference genomes/sequences. If no counter-references are specified, we'll look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--passthru-args', nargs='*', help='A list of arguments to be passed through to ' 'bowtie2 [alignment and counter-alignment]. ' 'Substitute + for - (e.g., --passthru-args ' '+m 4 50') context = e.get_context() new_references = validate_references(**context) new_counter_references = cat_counter_references(**context) e.update_context({'references': new_references, 'counter_references': new_counter_references}) sequence = e.get_sequence(**context) e._sequence = merge_pairs(sequence) e.do_action(align2)
def main(): e = Environment(version=VERSION, doc=__doc__) e.set_filename_parser(BowtieFilenameParser) # let bowtie do the multiprocessing e.override_num_cpus(1) parser = e.argument_parser parser.add_argument('--path-to-bowtie', nargs='?', default=path_to_executable('bowtie', '/usr/local/bowtie-*', environ= 'SOT_PATH_TO_BOWTIE'), help='The path to the bowtie executable') parser.add_argument('--path-to-samtools', nargs='?', default=path_to_executable('samtools', '/usr/local/samtools*', environ= 'SOT_PATH_TO_SAMTOOLS'), help='The path to the samtools executable') # fix aliases, should be --ref too parser.add_argument('--reference', dest='references', action='append', help=dedent('''\ Reference genome to align against (either a bowtie index name or file, or a fasta file). This flag may be called multiple times (which will cause each reference to be aligned to separately). If no references are specified, we'll look the for environment variable SOT_DEFAULT_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--no-unique', dest='unique', action='store_false', help='do not produce unique/ alignment folder') parser.add_argument('--no-random', dest='random', action='store_false', help='do not produce random/ alignment folder') parser.add_argument('--ignore-quality', dest='use_quality', action='store_false', help=dedent('''\ Use -v mode with bowtie, allows only n mismatches total. Also applies to counter-references if any are called''')) parser.add_argument('--mismatches', default='2', help=dedent('''\ allow n mismatches, in the seed (default) or total if --ignore-quality (-v mode)''')) parser.add_argument('--quals-type', default='solex1.3', choices=['solexa', 'solexa1.3', 'phred64', 'phred33', 'integer'], help='Valid options are integer, solexa1.3, solexa, ' 'phred33, or phred64 (see bowtie for more info)') parser.add_argument('--max-quality', default='70', help=dedent('''\ specify maximum quality scores of all mismatched positions (default is 70), ignored in --ignore-quality (-v) mode''')) parser.add_argument('--seed-length', dest='seed_len', default='28', help='use seed length of m (default is 28)') cparser = parser.add_argument_group('counter-alignments', description=dedent('''\ specify counter-reference genome(s)/sequence(s) to use for filtering out unwanted reads.''')) cparser.add_argument('--counter-reference', dest='counter_references', action='append', help=dedent('''\ Optional counter-reference genome/sequences to align against (either a bowtie index name or file, or a fasta file). This flag may be called multiple times. All counter-references will be concatenated into one index, and reads will be aligned in --no-unique (-M 1) mode. Any reads which align will be saved in a separate directory called 'bad_reads' and not aligned against the reference genomes/sequences. If no counter-references are specified, we'll look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) cparser.add_argument('--counter-mismatches', default=None, help=dedent('''\ allow n mismatches to counter-reference(s), in the seed (default) or total if --ignore-quality (-v mode). Default: same as references''')) cparser.add_argument('--counter-max-quality', default='70', help=dedent('''\ specify maximum quality scores of all mismatched positions when aligning to counter-reference(s) (default is 70), ignored in --ignore-quality (-v) mode''')) context = e.get_context() new_references = validate_references(**context) new_counter_references = cat_counter_references(**context) e.update_context({'references': new_references, 'counter_references': new_counter_references}) e.do_action(align)
def run_macs(f, subpeaks=True, path_to_macs=None, logging_level=10, user_gsize=None, qvalue=0.01, passthru_args=None, **kwargs): """Run MACS on a BAM file """ logger = get_logger(logging_level) if path_to_macs is None: path_to_macs = path_to_executable("macs2") input_file = f.input_file control_file = f.control_file logger.debug('Processing %s', input_file) if control_file is not None: logger.debug('with control %s', control_file) # determine genome name and size if user_gsize: genome_size = user_gsize try: genome_build = guess_bam_genome(input_file) except NoMatchFoundError: genome_build = None else: try: genome_build = guess_bam_genome(input_file) except NoMatchFoundError: raise Usage('\ Could not determine genome / genome size for file %s' % input_file) gname = ''.join([x for x in genome_build if x.isalpha()]) if gname == 'hg': genome_size = 'hs' elif gname in ['mm', 'ce', 'dm']: genome_size = gname else: genome_size = '%.1e' % sum(genome(genome_build).itervalues()) fmt = decide_format(input_file, control_file, logger) name = f.sample_name.replace(' ', '_') if passthru_args is not None: for i in range(len(passthru_args)): passthru_args[i] = passthru_args[i].replace('+', '-') logger.debug('Passing thru arguments %s', ' '.join(passthru_args)) macs_options = ['--trackline', '-f', fmt, # correct file format BAM or BAMPE '-B', '--SPMR', # bedgraphs, SPMR '-g', genome_size, '-q', qvalue, '-n', name, # run name '-t', join(getcwd(), input_file)] # treatment if control_file is not None: macs_options.extend(['-c', join(getcwd(), control_file)]) if subpeaks: macs_options.append('--call-summits') if passthru_args is not None: macs_options.extend(passthru_args) step = [path_to_macs, 'callpeak'] + macs_options if platform.system() is 'Windows': step.insert(sys.executable, 0) macs_stdout = PolledPipe(logger=logger, level=WARN) macs_stderr = PolledPipe(logger=logger, level=ERROR) logger.debug('Launching %s', ' '.join(step)) job = Popen(step, stdout=macs_stdout.w, stderr=macs_stderr.w, cwd=f.output_dir) pollables = [macs_stdout, macs_stderr] wait_for_job(job, pollables, logger) return '%s\n\n' % ' '.join(step)
from gzip import GzipFile from bz2 import BZ2File try: from scripter import path_to_executable, Usage from subprocess import Popen, PIPE try: PATH_TO_GZIP = path_to_executable('gzip') except Usage: pass except ImportError: pass from sys import stderr from functools import partial # slow for reading, fast for writing def gzip_class_factory(path_to_gzip='gzip'): return partial(gzip_open_func, path_to_gzip='gzip') class gzip_open_func(object): """gzip open func modes: (r) read using gzip.GzipFile (w) write using system gzip (P) PIPE from `gzip -d` for """ def __init__(self, filename, mode='r', path_to_gzip='gzip'): self._filename = filename self._mode = mode self._path_to_gzip = path_to_gzip