Esempio n. 1
0
def main():
    e = Environment(version=VERSION, doc=__doc__)
    e.set_filename_parser(BowtieFilenameParser)
    # let bwa do the multiprocessing
    parser = e.argument_parser
    parser.add_argument('--path-to-bwa', nargs='?',
                        default=path_to_executable('bwa', '/usr/local/bwa*',
                                                   environ='SOT_PATH_TO_BWA'),
                        help='The path to the bwa executable')
    parser.add_argument('--path-to-samtools', nargs='?',
                        default=path_to_executable('samtools',
                                                   '/usr/local/samtools*',
                                                   environ=
                                                   'SOT_PATH_TO_SAMTOOLS'),
                        help='The path to the samtools executable')
    # fix aliases, should be --ref too
    parser.add_argument('--reference', dest='references', action='append',
                        help=dedent('''\
    Reference genome to align against (should be a
    fasta file indexed by bwa). This flag may be called multiple times
    (which will cause each reference to be aligned to separately). If no
    references are specified, we'll look the for environment variable
    SOT_DEFAULT_REFERENCES, which should be given as a list,
    e.g. "foo foo2 foo3"'''),
                        )
    parser.add_argument('--passthru-args', nargs='*',
                        help='A list of arguments to be passed through to bwa '
                             'Substitute + '
                             'for - (e.g., --passthru-args +m 4 50')
    context = e.get_context()
    new_references = validate_references(**context)
    e.update_context({'references': new_references})
    sequence = e.get_sequence(**context)
    e._sequence = merge_pairs(sequence)
    e.do_action(align_bwa)
Esempio n. 2
0
def main():
    e = Environment(doc=__doc__, version=VERSION)
    e.set_filename_parser(PeaksFilenameParser)
    parser = e.argument_parser
    parser.add_argument('--bysummit', action='store_true', default=False,
                        help='Assume FASTA files are centered at the summit, '
                             'rather than spanning start to end.')
    parser.add_argument('--motif', required=True, dest='motif_file',
                        help='Path to file containing motif')
    parser.add_argument('--motif-number', type=int, default=1,
                        help='Motif number within file (e.g. 1, 2, 3) '
                             '[Default is 1st]')
    parser.add_argument('--motif-type', default='MEME',
                        help='motif type (see Bio.Motif for more info)')
    parser.add_argument('--genome',
                        help='Reference genome (path to 2bit file)')
    parser.set_defaults(**{'target': 'analysis'})
    e.do_action(action)
def main():
    e = Environment(version=VERSION, doc=__doc__)
    parser = e.argument_parser
    parser.add_argument('--no-clipping', default=False, action='store_true',
                        help='Do not clip barcodes from reads when assigning '
                             'to barcode. OVERRIDES ALL OTHER '
                             'CLIPPING OPTIONS')
    parser.add_argument('--strip-after-barcode', default=1, type=int,
                        help="strip n bases after the barcode is removed "
                             "(5' end) (by default this 1 now, and is ignored "
                             "if GERALD handled the barcoding)")
    parser.add_argument('--strip-before-barcode', default=0, type=int,
                        help="strip n bases before the barcode is removed "
                             "(5' end) (by default this 0 now, and is ignored "
                             "if GERALD handled the barcoding)""")
    parser.add_argument('--min-length', type=int, default=4,
                        help='require sequences to be at least n total bases '
                             'of non-N sequence (default: ignore)')
    parser.add_argument('--max-length', type=int, default=-1,
                        help='truncate final sequences to n bases (default: '
                             'ignore)')
    parser.add_argument('--no-gzip', default=False, action='store_true',
                        help='Do not gzip output files')
    bgroup = parser.add_argument_group('barcodes',
                                       'Specify sequence barcodes in the '
                                       'sample(s)')
    bgroup.add_argument('-b', '--barcodes', action='append', type=valid_seq, default=[],
                        help="Specify a barcode sequence. May be invoked "
                             "multiple times")
    bgroup.add_argument('--kry-barcodes', dest='barcodes',
                        action='store_const',
                        help='Alias for -bTCAT -bGACG -bAGTC -bCTGA',
                        const=['TCAT', 'GACG', 'AGTC', 'CTGA'])
    parser.add_argument('--linker', default='', type=valid_seq,
                        help="Specify a 3' adaptor/linker sequence that we "
                             "should clip off of each read")
    parser.set_defaults(**{'target': 'processed'})
    e.set_filename_parser(BarcodeFilenameParser)
    e.set_config_reader(read_config)
    e.set_config_writer(write_config)
    e.do_action(splitter)
Esempio n. 4
0
def main():
    e = Environment(doc=__doc__, version=VERSION)
    parser = e.argument_parser
    parser.add_argument('-g', '--genome-size', dest='user_gsize', default=None,
                        help='Optional user-specified genome size (DEFAULT: '
                             'script will try to auto-detect the genome)')
    parser.add_argument('--path-to-macs',
                        default=path_to_executable("macs2"),
                        help="optional path to macs2 executable")
    parser.add_argument('--no-subpeaks', dest='subpeaks', action='store_false',
                        default=True,
                        help='do not call subpeaks with --call-summits')
    parser.add_argument('-q', '--q-value', dest='qvalue', default='0.01',
                        help='FDR/q-value cutoff (default is 0.01)')
    parser.add_argument('--passthru-args', nargs='*',
                        help='A list of arguments to be passed through to '
                             'MACS2. Substitute + for - (e.g., '
                             '--passthru-args +m 4 50')
    parser.set_defaults(**{'target': 'peaks'})
    e.set_filename_parser(BAMFilenameParser)
    e.set_config_reader(read_setup_file)
    e.set_config_writer(write_setup_file)
    e.do_action(run_macs)
Esempio n. 5
0
def main():
    e = Environment(version=VERSION, doc=__doc__)
    e.set_filename_parser(BowtieFilenameParser)
    # let bowtie2 do the multiprocessing
    e.override_num_cpus(1)
    parser = e.argument_parser
    parser.add_argument('--path-to-bowtie2', nargs='?',
                        default=path_to_executable('bowtie2',
                                                   '/usr/local/bowtie2-*',
                                                   environ=
                                                   'SOT_PATH_TO_BOWTIE2'),
                        help='The path to the bowtie2 executable')
    parser.add_argument('--path-to-samtools', nargs='?',
                        default=path_to_executable('samtools',
                                                   '/usr/local/samtools*',
                                                   environ=
                                                   'SOT_PATH_TO_SAMTOOLS'),
                        help='The path to the samtools executable')
    # fix aliases, should be --ref too
    parser.add_argument('--reference', dest='references', action='append',
                        help=dedent('''\
    Reference genome to align against (either a bowtie2 index name or file,
    or a fasta file). This flag may be called multiple times (which will
    cause each reference to be aligned to separately). If no references are
    specified, we'll look the for environment variable
    SOT_DEFAULT_REFERENCES, which should be given as a list,
    e.g. "foo foo2 foo3"'''),
                        )
    parser.add_argument('--ignore-quality', dest='use_quality',
                        action='store_false',
                        help=dedent('''\
    Ignore quality scores if available. Also applies to
    counter-references if any are called'''))
    cparser = parser.add_argument_group('counter-alignments',
                                        description=dedent('''\
    specify counter-reference genome(s)/sequence(s) to use for filtering out
    unwanted reads.'''))
    cparser.add_argument('--counter-reference', dest='counter_references',
                         action='append',
                         help=dedent('''\
    Optional counter-reference genome/sequences to align against (either a
    bowtie2 index name or file, or a fasta file). This flag may be called
    multiple times. All counter-references will be concatenated into one
    index, and reads will
    be aligned in --fast mode. Any reads which align will be saved
    in a separate directory called 'counteraligned' and not aligned against the
    reference genomes/sequences. If no counter-references are specified, we'll
    look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES,
    which should be given as a list, e.g. "foo foo2 foo3"'''),
                         )
    parser.add_argument('--passthru-args', nargs='*',
                        help='A list of arguments to be passed through to '
                             'bowtie2 [alignment and counter-alignment]. '
                             'Substitute + for - (e.g., --passthru-args '
                             '+m 4 50')
    context = e.get_context()
    new_references = validate_references(**context)
    new_counter_references = cat_counter_references(**context)
    e.update_context({'references': new_references,
                      'counter_references': new_counter_references})
    sequence = e.get_sequence(**context)
    e._sequence = merge_pairs(sequence)
    e.do_action(align2)
Esempio n. 6
0
def main():
    e = Environment(version=VERSION, doc=__doc__)
    e.set_filename_parser(BowtieFilenameParser)
    # let bowtie do the multiprocessing
    e.override_num_cpus(1)
    parser = e.argument_parser
    parser.add_argument('--path-to-bowtie', nargs='?',
                        default=path_to_executable('bowtie',
                                                   '/usr/local/bowtie-*',
                                                   environ=
                                                   'SOT_PATH_TO_BOWTIE'),
                        help='The path to the bowtie executable')
    parser.add_argument('--path-to-samtools', nargs='?',
                        default=path_to_executable('samtools',
                                                   '/usr/local/samtools*',
                                                   environ=
                                                   'SOT_PATH_TO_SAMTOOLS'),
                        help='The path to the samtools executable')
    # fix aliases, should be --ref too
    parser.add_argument('--reference', dest='references', action='append',
                        help=dedent('''\
    Reference genome to align against (either a bowtie index name or file, or a
    fasta file). This flag may be called multiple times (which will cause each
    reference to be aligned to separately). If no references are specified,
    we'll look the for environment variable SOT_DEFAULT_REFERENCES, which
    should be given as a list, e.g. "foo foo2 foo3"'''),
                        )
    parser.add_argument('--no-unique', dest='unique', action='store_false',
                        help='do not produce unique/ alignment folder')
    parser.add_argument('--no-random', dest='random', action='store_false',
                        help='do not produce random/ alignment folder')
    parser.add_argument('--ignore-quality', dest='use_quality',
                        action='store_false',
                        help=dedent('''\
    Use -v mode with bowtie, allows only n mismatches total. Also applies to
    counter-references if any are called'''))
    parser.add_argument('--mismatches', default='2',
                        help=dedent('''\
    allow n mismatches, in the seed (default) or total if
    --ignore-quality (-v mode)'''))
    parser.add_argument('--quals-type', default='solex1.3',
                        choices=['solexa', 'solexa1.3', 'phred64', 'phred33',
                                 'integer'],
                        help='Valid options are integer, solexa1.3, solexa, '
                             'phred33, or phred64 (see bowtie for more info)')
    parser.add_argument('--max-quality', default='70',
                        help=dedent('''\
    specify maximum quality scores of all mismatched positions (default is 70),
    ignored in --ignore-quality (-v) mode'''))
    parser.add_argument('--seed-length', dest='seed_len', default='28',
                        help='use seed length of m (default is 28)')
    cparser = parser.add_argument_group('counter-alignments',
                                        description=dedent('''\
    specify counter-reference genome(s)/sequence(s) to use for filtering out
    unwanted reads.'''))
    cparser.add_argument('--counter-reference', dest='counter_references',
                         action='append',
                         help=dedent('''\
    Optional counter-reference genome/sequences to align against (either a
    bowtie index name or file, or a fasta file). This flag may be called
    multiple times. All counter-references will be concatenated into one
    index, and reads will be aligned in --no-unique (-M 1) mode. Any reads
    which align will be saved
    in a separate directory called 'bad_reads' and not aligned against the
    reference genomes/sequences. If no counter-references are specified, we'll
    look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES,
    which should be given as a list, e.g. "foo foo2 foo3"'''),
                         )
    cparser.add_argument('--counter-mismatches', default=None,
                         help=dedent('''\
    allow n mismatches to counter-reference(s), in the seed (default) or total
    if --ignore-quality (-v mode). Default: same as references'''))
    cparser.add_argument('--counter-max-quality', default='70',
                         help=dedent('''\
    specify maximum quality scores of all mismatched positions when aligning to
    counter-reference(s) (default is 70), ignored in --ignore-quality (-v)
    mode'''))
    context = e.get_context()
    new_references = validate_references(**context)
    new_counter_references = cat_counter_references(**context)
    e.update_context({'references': new_references,
                      'counter_references': new_counter_references})
    e.do_action(align)