def main(): e = Environment(version=VERSION, doc=__doc__) parser = e.argument_parser parser.add_argument('--strip-after-barcode', default=1, type=int, help="""strip n bases after the barcode is removed (5' end) (by default this 1 now, and is ignored if GERALD handled the barcoding)""") parser.add_argument('--strip-before-barcode', default=0, type=int, help="""strip n bases before the barcode is removed (5' end) (by default this 0 now, and is ignored if GERALD handled the barcoding)""") parser.add_argument('--min-length', type=int, default=4, help='require sequences to be at least n total bases of non-N sequence (default: ignore)') parser.add_argument('--max-length', type=int, default=-1, help='truncate final sequences to n bases (default: ignore)') parser.add_argument('--no-gzip', default=False, action='store_true', help = 'Do not gzip output files') bgroup = parser.add_argument_group('barcodes', 'Specify sequence barcodes in the sample(s)') bgroup.add_argument('-b', '--barcodes', action='append', type=valid_seq, help="Specify a barcode sequence. May be invoked multiple times") bgroup.add_argument('--kry-barcodes', dest='barcodes', action='store_const', help='Alias for -bTCAT -bGACG -bAGTC -bCTGA', const=['TCAT', 'GACG', 'AGTC', 'CTGA']) parser.add_argument('--linker', default='', type=valid_seq, help="Specify a 3' adaptor/linker sequence that we should clip off of each read") parser.add_argument('--collapse', default=False, action='store_true', help='Collapse identical reads') parser.set_defaults(**{'target': 'processed'}) e.set_filename_parser(BarcodeFilenameParser) e.set_config_reader(read_config) e.set_config_writer(write_config) e.do_action(splitter) e.do_action(collapser)
def main(): e = Environment(version=VERSION, doc=__doc__) e.set_filename_parser(BowtieFilenameParser) # let bwa do the multiprocessing parser = e.argument_parser parser.add_argument('--path-to-bwa', nargs='?', default=path_to_executable('bwa', '/usr/local/bwa*', environ='SOT_PATH_TO_BWA'), help='The path to the bwa executable') parser.add_argument('--path-to-samtools', nargs='?', default=path_to_executable('samtools', '/usr/local/samtools*', environ= 'SOT_PATH_TO_SAMTOOLS'), help='The path to the samtools executable') # fix aliases, should be --ref too parser.add_argument('--reference', dest='references', action='append', help=dedent('''\ Reference genome to align against (should be a fasta file indexed by bwa). This flag may be called multiple times (which will cause each reference to be aligned to separately). If no references are specified, we'll look the for environment variable SOT_DEFAULT_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--passthru-args', nargs='*', help='A list of arguments to be passed through to bwa ' 'Substitute + ' 'for - (e.g., --passthru-args +m 4 50') context = e.get_context() new_references = validate_references(**context) e.update_context({'references': new_references}) sequence = e.get_sequence(**context) e._sequence = merge_pairs(sequence) e.do_action(align_bwa)
def main(): e = Environment(doc=__doc__, version=VERSION) e.set_filename_parser(PeaksFilenameParser) parser = e.argument_parser parser.add_argument('--bysummit', action='store_true', default=False, help='Assume FASTA files are centered at the summit, ' 'rather than spanning start to end.') parser.add_argument('--motif', required=True, dest='motif_file', help='Path to file containing motif') parser.add_argument('--motif-number', type=int, default=1, help='Motif number within file (e.g. 1, 2, 3) ' '[Default is 1st]') parser.add_argument('--motif-type', default='MEME', help='motif type (see Bio.Motif for more info)') parser.add_argument('--genome', help='Reference genome (path to 2bit file)') parser.set_defaults(**{'target': 'analysis'}) e.do_action(action)
def main(): e = Environment(doc=__doc__, version=VERSION) parser = e.argument_parser parser.add_argument('-g', '--genome-size', dest='user_gsize', default=None, help='Optional user-specified genome size (DEFAULT: ' 'script will try to auto-detect the genome)') parser.add_argument('--path-to-macs', default=path_to_executable("macs2"), help="optional path to macs2 executable") parser.add_argument('--no-subpeaks', dest='subpeaks', action='store_false', default=True, help='do not call subpeaks with --call-summits') parser.add_argument('-q', '--q-value', dest='qvalue', default='0.01', help='FDR/q-value cutoff (default is 0.01)') parser.add_argument('--passthru-args', nargs='*', help='A list of arguments to be passed through to ' 'MACS2. Substitute + for - (e.g., ' '--passthru-args +m 4 50') parser.set_defaults(**{'target': 'peaks'}) e.set_filename_parser(BAMFilenameParser) e.set_config_reader(read_setup_file) e.set_config_writer(write_setup_file) e.do_action(run_macs)
def main(): e = Environment(version=VERSION, doc=__doc__) e.set_filename_parser(BowtieFilenameParser) # let bowtie2 do the multiprocessing e.override_num_cpus(1) parser = e.argument_parser parser.add_argument('--path-to-bowtie2', nargs='?', default=path_to_executable('bowtie2', '/usr/local/bowtie2-*', environ= 'SOT_PATH_TO_BOWTIE2'), help='The path to the bowtie2 executable') parser.add_argument('--path-to-samtools', nargs='?', default=path_to_executable('samtools', '/usr/local/samtools*', environ= 'SOT_PATH_TO_SAMTOOLS'), help='The path to the samtools executable') # fix aliases, should be --ref too parser.add_argument('--reference', dest='references', action='append', help=dedent('''\ Reference genome to align against (either a bowtie2 index name or file, or a fasta file). This flag may be called multiple times (which will cause each reference to be aligned to separately). If no references are specified, we'll look the for environment variable SOT_DEFAULT_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--ignore-quality', dest='use_quality', action='store_false', help=dedent('''\ Ignore quality scores if available. Also applies to counter-references if any are called''')) cparser = parser.add_argument_group('counter-alignments', description=dedent('''\ specify counter-reference genome(s)/sequence(s) to use for filtering out unwanted reads.''')) cparser.add_argument('--counter-reference', dest='counter_references', action='append', help=dedent('''\ Optional counter-reference genome/sequences to align against (either a bowtie2 index name or file, or a fasta file). This flag may be called multiple times. All counter-references will be concatenated into one index, and reads will be aligned in --fast mode. Any reads which align will be saved in a separate directory called 'counteraligned' and not aligned against the reference genomes/sequences. If no counter-references are specified, we'll look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--passthru-args', nargs='*', help='A list of arguments to be passed through to ' 'bowtie2 [alignment and counter-alignment]. ' 'Substitute + for - (e.g., --passthru-args ' '+m 4 50') context = e.get_context() new_references = validate_references(**context) new_counter_references = cat_counter_references(**context) e.update_context({'references': new_references, 'counter_references': new_counter_references}) sequence = e.get_sequence(**context) e._sequence = merge_pairs(sequence) e.do_action(align2)
def main(): e = Environment(version=VERSION, doc=__doc__) e.set_filename_parser(BowtieFilenameParser) # let bowtie do the multiprocessing e.override_num_cpus(1) parser = e.argument_parser parser.add_argument('--path-to-bowtie', nargs='?', default=path_to_executable('bowtie', '/usr/local/bowtie-*', environ= 'SOT_PATH_TO_BOWTIE'), help='The path to the bowtie executable') parser.add_argument('--path-to-samtools', nargs='?', default=path_to_executable('samtools', '/usr/local/samtools*', environ= 'SOT_PATH_TO_SAMTOOLS'), help='The path to the samtools executable') # fix aliases, should be --ref too parser.add_argument('--reference', dest='references', action='append', help=dedent('''\ Reference genome to align against (either a bowtie index name or file, or a fasta file). This flag may be called multiple times (which will cause each reference to be aligned to separately). If no references are specified, we'll look the for environment variable SOT_DEFAULT_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) parser.add_argument('--no-unique', dest='unique', action='store_false', help='do not produce unique/ alignment folder') parser.add_argument('--no-random', dest='random', action='store_false', help='do not produce random/ alignment folder') parser.add_argument('--ignore-quality', dest='use_quality', action='store_false', help=dedent('''\ Use -v mode with bowtie, allows only n mismatches total. Also applies to counter-references if any are called''')) parser.add_argument('--mismatches', default='2', help=dedent('''\ allow n mismatches, in the seed (default) or total if --ignore-quality (-v mode)''')) parser.add_argument('--quals-type', default='solex1.3', choices=['solexa', 'solexa1.3', 'phred64', 'phred33', 'integer'], help='Valid options are integer, solexa1.3, solexa, ' 'phred33, or phred64 (see bowtie for more info)') parser.add_argument('--max-quality', default='70', help=dedent('''\ specify maximum quality scores of all mismatched positions (default is 70), ignored in --ignore-quality (-v) mode''')) parser.add_argument('--seed-length', dest='seed_len', default='28', help='use seed length of m (default is 28)') cparser = parser.add_argument_group('counter-alignments', description=dedent('''\ specify counter-reference genome(s)/sequence(s) to use for filtering out unwanted reads.''')) cparser.add_argument('--counter-reference', dest='counter_references', action='append', help=dedent('''\ Optional counter-reference genome/sequences to align against (either a bowtie index name or file, or a fasta file). This flag may be called multiple times. All counter-references will be concatenated into one index, and reads will be aligned in --no-unique (-M 1) mode. Any reads which align will be saved in a separate directory called 'bad_reads' and not aligned against the reference genomes/sequences. If no counter-references are specified, we'll look the for environment variable SOT_DEFAULT_COUTNER_REFERENCES, which should be given as a list, e.g. "foo foo2 foo3"'''), ) cparser.add_argument('--counter-mismatches', default=None, help=dedent('''\ allow n mismatches to counter-reference(s), in the seed (default) or total if --ignore-quality (-v mode). Default: same as references''')) cparser.add_argument('--counter-max-quality', default='70', help=dedent('''\ specify maximum quality scores of all mismatched positions when aligning to counter-reference(s) (default is 70), ignored in --ignore-quality (-v) mode''')) context = e.get_context() new_references = validate_references(**context) new_counter_references = cat_counter_references(**context) e.update_context({'references': new_references, 'counter_references': new_counter_references}) e.do_action(align)