예제 #1
0
def main():
    e = Environment(version=VERSION, doc=__doc__)
    e.set_filename_parser(BowtieFilenameParser)
    # let bwa do the multiprocessing
    parser = e.argument_parser
    parser.add_argument('--path-to-bwa', nargs='?',
                        default=path_to_executable('bwa', '/usr/local/bwa*',
                                                   environ='SOT_PATH_TO_BWA'),
                        help='The path to the bwa executable')
    parser.add_argument('--path-to-samtools', nargs='?',
                        default=path_to_executable('samtools',
                                                   '/usr/local/samtools*',
                                                   environ=
                                                   'SOT_PATH_TO_SAMTOOLS'),
                        help='The path to the samtools executable')
    # fix aliases, should be --ref too
    parser.add_argument('--reference', dest='references', action='append',
                        help=dedent('''\
    Reference genome to align against (should be a
    fasta file indexed by bwa). This flag may be called multiple times
    (which will cause each reference to be aligned to separately). If no
    references are specified, we'll look the for environment variable
    SOT_DEFAULT_REFERENCES, which should be given as a list,
    e.g. "foo foo2 foo3"'''),
                        )
    parser.add_argument('--passthru-args', nargs='*',
                        help='A list of arguments to be passed through to bwa '
                             'Substitute + '
                             'for - (e.g., --passthru-args +m 4 50')
    context = e.get_context()
    new_references = validate_references(**context)
    e.update_context({'references': new_references})
    sequence = e.get_sequence(**context)
    e._sequence = merge_pairs(sequence)
    e.do_action(align_bwa)
예제 #2
0
def main():
    e = Environment(doc=__doc__, version=VERSION)
    parser = e.argument_parser
    parser.add_argument('-g', '--genome-size', dest='user_gsize', default=None,
                        help='Optional user-specified genome size (DEFAULT: '
                             'script will try to auto-detect the genome)')
    parser.add_argument('--path-to-macs',
                        default=path_to_executable("macs2"),
                        help="optional path to macs2 executable")
    parser.add_argument('--no-subpeaks', dest='subpeaks', action='store_false',
                        default=True,
                        help='do not call subpeaks with --call-summits')
    parser.add_argument('-q', '--q-value', dest='qvalue', default='0.01',
                        help='FDR/q-value cutoff (default is 0.01)')
    parser.add_argument('--passthru-args', nargs='*',
                        help='A list of arguments to be passed through to '
                             'MACS2. Substitute + for - (e.g., '
                             '--passthru-args +m 4 50')
    parser.set_defaults(**{'target': 'peaks'})
    e.set_filename_parser(BAMFilenameParser)
    e.set_config_reader(read_setup_file)
    e.set_config_writer(write_setup_file)
    e.do_action(run_macs)
예제 #3
0
def main():
    e = Environment(version=VERSION, doc=__doc__)
    e.set_filename_parser(BowtieFilenameParser)
    # let bowtie2 do the multiprocessing
    e.override_num_cpus(1)
    parser = e.argument_parser
    parser.add_argument('--path-to-bowtie2', nargs='?',
                        default=path_to_executable('bowtie2',
                                                   '/usr/local/bowtie2-*',
                                                   environ=
                                                   'SOT_PATH_TO_BOWTIE2'),
                        help='The path to the bowtie2 executable')
    parser.add_argument('--path-to-samtools', nargs='?',
                        default=path_to_executable('samtools',
                                                   '/usr/local/samtools*',
                                                   environ=
                                                   'SOT_PATH_TO_SAMTOOLS'),
                        help='The path to the samtools executable')
    # fix aliases, should be --ref too
    parser.add_argument('--reference', dest='references', action='append',
                        help=dedent('''\
    Reference genome to align against (either a bowtie2 index name or file,
    or a fasta file). This flag may be called multiple times (which will
    cause each reference to be aligned to separately). If no references are
    specified, we'll look the for environment variable
    SOT_DEFAULT_REFERENCES, which should be given as a list,
    e.g. "foo foo2 foo3"'''),
                        )
    parser.add_argument('--ignore-quality', dest='use_quality',
                        action='store_false',
                        help=dedent('''\
    Ignore quality scores if available. Also applies to
    counter-references if any are called'''))
    cparser = parser.add_argument_group('counter-alignments',
                                        description=dedent('''\
    specify counter-reference genome(s)/sequence(s) to use for filtering out
    unwanted reads.'''))
    cparser.add_argument('--counter-reference', dest='counter_references',
                         action='append',
                         help=dedent('''\
    Optional counter-reference genome/sequences to align against (either a
    bowtie2 index name or file, or a fasta file). This flag may be called
    multiple times. All counter-references will be concatenated into one
    index, and reads will
    be aligned in --fast mode. Any reads which align will be saved
    in a separate directory called 'counteraligned' and not aligned against the
    reference genomes/sequences. If no counter-references are specified, we'll
    look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES,
    which should be given as a list, e.g. "foo foo2 foo3"'''),
                         )
    parser.add_argument('--passthru-args', nargs='*',
                        help='A list of arguments to be passed through to '
                             'bowtie2 [alignment and counter-alignment]. '
                             'Substitute + for - (e.g., --passthru-args '
                             '+m 4 50')
    context = e.get_context()
    new_references = validate_references(**context)
    new_counter_references = cat_counter_references(**context)
    e.update_context({'references': new_references,
                      'counter_references': new_counter_references})
    sequence = e.get_sequence(**context)
    e._sequence = merge_pairs(sequence)
    e.do_action(align2)
예제 #4
0
def main():
    e = Environment(version=VERSION, doc=__doc__)
    e.set_filename_parser(BowtieFilenameParser)
    # let bowtie do the multiprocessing
    e.override_num_cpus(1)
    parser = e.argument_parser
    parser.add_argument('--path-to-bowtie', nargs='?',
                        default=path_to_executable('bowtie',
                                                   '/usr/local/bowtie-*',
                                                   environ=
                                                   'SOT_PATH_TO_BOWTIE'),
                        help='The path to the bowtie executable')
    parser.add_argument('--path-to-samtools', nargs='?',
                        default=path_to_executable('samtools',
                                                   '/usr/local/samtools*',
                                                   environ=
                                                   'SOT_PATH_TO_SAMTOOLS'),
                        help='The path to the samtools executable')
    # fix aliases, should be --ref too
    parser.add_argument('--reference', dest='references', action='append',
                        help=dedent('''\
    Reference genome to align against (either a bowtie index name or file, or a
    fasta file). This flag may be called multiple times (which will cause each
    reference to be aligned to separately). If no references are specified,
    we'll look the for environment variable SOT_DEFAULT_REFERENCES, which
    should be given as a list, e.g. "foo foo2 foo3"'''),
                        )
    parser.add_argument('--no-unique', dest='unique', action='store_false',
                        help='do not produce unique/ alignment folder')
    parser.add_argument('--no-random', dest='random', action='store_false',
                        help='do not produce random/ alignment folder')
    parser.add_argument('--ignore-quality', dest='use_quality',
                        action='store_false',
                        help=dedent('''\
    Use -v mode with bowtie, allows only n mismatches total. Also applies to
    counter-references if any are called'''))
    parser.add_argument('--mismatches', default='2',
                        help=dedent('''\
    allow n mismatches, in the seed (default) or total if
    --ignore-quality (-v mode)'''))
    parser.add_argument('--quals-type', default='solex1.3',
                        choices=['solexa', 'solexa1.3', 'phred64', 'phred33',
                                 'integer'],
                        help='Valid options are integer, solexa1.3, solexa, '
                             'phred33, or phred64 (see bowtie for more info)')
    parser.add_argument('--max-quality', default='70',
                        help=dedent('''\
    specify maximum quality scores of all mismatched positions (default is 70),
    ignored in --ignore-quality (-v) mode'''))
    parser.add_argument('--seed-length', dest='seed_len', default='28',
                        help='use seed length of m (default is 28)')
    cparser = parser.add_argument_group('counter-alignments',
                                        description=dedent('''\
    specify counter-reference genome(s)/sequence(s) to use for filtering out
    unwanted reads.'''))
    cparser.add_argument('--counter-reference', dest='counter_references',
                         action='append',
                         help=dedent('''\
    Optional counter-reference genome/sequences to align against (either a
    bowtie index name or file, or a fasta file). This flag may be called
    multiple times. All counter-references will be concatenated into one
    index, and reads will be aligned in --no-unique (-M 1) mode. Any reads
    which align will be saved
    in a separate directory called 'bad_reads' and not aligned against the
    reference genomes/sequences. If no counter-references are specified, we'll
    look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES,
    which should be given as a list, e.g. "foo foo2 foo3"'''),
                         )
    cparser.add_argument('--counter-mismatches', default=None,
                         help=dedent('''\
    allow n mismatches to counter-reference(s), in the seed (default) or total
    if --ignore-quality (-v mode). Default: same as references'''))
    cparser.add_argument('--counter-max-quality', default='70',
                         help=dedent('''\
    specify maximum quality scores of all mismatched positions when aligning to
    counter-reference(s) (default is 70), ignored in --ignore-quality (-v)
    mode'''))
    context = e.get_context()
    new_references = validate_references(**context)
    new_counter_references = cat_counter_references(**context)
    e.update_context({'references': new_references,
                      'counter_references': new_counter_references})
    e.do_action(align)
예제 #5
0
def run_macs(f, subpeaks=True, path_to_macs=None, logging_level=10,
             user_gsize=None, qvalue=0.01, passthru_args=None,
             **kwargs):
    """Run MACS on a BAM file
    """
    logger = get_logger(logging_level)
    if path_to_macs is None:
        path_to_macs = path_to_executable("macs2")

    input_file = f.input_file
    control_file = f.control_file
    logger.debug('Processing %s', input_file)
    if control_file is not None:
        logger.debug('with control %s', control_file)

    # determine genome name and size
    if user_gsize:
        genome_size = user_gsize
        try:
            genome_build = guess_bam_genome(input_file)
        except NoMatchFoundError:
            genome_build = None
    else:
        try:
            genome_build = guess_bam_genome(input_file)
        except NoMatchFoundError:
            raise Usage('\
Could not determine genome / genome size for file %s' % input_file)

        gname = ''.join([x for x in genome_build if x.isalpha()])
        if gname == 'hg':
            genome_size = 'hs'
        elif gname in ['mm', 'ce', 'dm']:
            genome_size = gname
        else:
            genome_size = '%.1e' % sum(genome(genome_build).itervalues())

    fmt = decide_format(input_file, control_file, logger)
    name = f.sample_name.replace(' ', '_')
    if passthru_args is not None:
        for i in range(len(passthru_args)):
            passthru_args[i] = passthru_args[i].replace('+', '-')
        logger.debug('Passing thru arguments %s', ' '.join(passthru_args))
    macs_options = ['--trackline',
                    '-f', fmt,  # correct file format BAM or BAMPE
                    '-B', '--SPMR',  # bedgraphs, SPMR
                    '-g', genome_size,
                    '-q', qvalue,
                    '-n', name,  # run name
                    '-t', join(getcwd(), input_file)]  # treatment
    if control_file is not None:
        macs_options.extend(['-c', join(getcwd(), control_file)])
    if subpeaks:
        macs_options.append('--call-summits')
    if passthru_args is not None:
        macs_options.extend(passthru_args)

    step = [path_to_macs, 'callpeak'] + macs_options
    if platform.system() is 'Windows':
        step.insert(sys.executable, 0)

    macs_stdout = PolledPipe(logger=logger, level=WARN)
    macs_stderr = PolledPipe(logger=logger, level=ERROR)
    logger.debug('Launching %s', ' '.join(step))
    job = Popen(step, stdout=macs_stdout.w, stderr=macs_stderr.w,
                cwd=f.output_dir)

    pollables = [macs_stdout, macs_stderr]
    wait_for_job(job, pollables, logger)

    return '%s\n\n' % ' '.join(step)
예제 #6
0
from gzip import GzipFile
from bz2 import BZ2File
try:
    from scripter import path_to_executable, Usage
    from subprocess import Popen, PIPE
    try:
        PATH_TO_GZIP = path_to_executable('gzip')
    except Usage:
        pass
except ImportError:
    pass
from sys import stderr
from functools import partial


# slow for reading, fast for writing
def gzip_class_factory(path_to_gzip='gzip'):
    return partial(gzip_open_func, path_to_gzip='gzip')


class gzip_open_func(object):
    """gzip open func
    modes:
    (r) read using gzip.GzipFile
    (w) write using system gzip
    (P) PIPE from `gzip -d` for
    """
    def __init__(self, filename, mode='r', path_to_gzip='gzip'):
        self._filename = filename
        self._mode = mode
        self._path_to_gzip = path_to_gzip