def main(): """ GATK Pre-processing Script """ # Define Parser object and add to jobTree parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.') subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the GATK preprocessing pipeline') group = parser_run.add_mutually_exclusive_group(required=True) parser_run.add_argument('--config', default='gatk_preprocessing.config', type=str, help='Path to the (filled in) config file, generated with "generate-config".') group.add_argument('--manifest', default='bwa-alignment-manifest.tsv', type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') group.add_argument('--sample', default=None, nargs='2', type=str, help='Space delimited sample UUID and BAM file in the format: uuid url.') parser_run.add_argument('--output-dir', default=None, help='Full path to directory or filename where ' 'final results will be output') parser_run.add_argument('-s', '--suffix', default='.bqsr', help='Additional suffix to add to the names of the output files') Job.Runner.addToilOptions(parser_run) options = parser.parse_args() cwd = os.getcwd() if options.command == 'generate-config' or options.command == 'generate': generate_file(os.path.join(cwd, 'gatk-preprocessing.config'), generate_config) if options.command == 'generate-manifest' or options.command == 'generate': generate_file(os.path.join(cwd, 'gatk-preprocessing-manifest.csv'), generate_manifest) # Pipeline execution elif options.command == 'run': require(os.path.exists(options.config), '{} not found. Please run ' '"generate-config"'.format(options.config)) if not options.sample: require(os.path.exists(options.manifest), '{} not found and no sample provided. Please ' 'run "generate-manifest"'.format(options.manifest)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(options.config).read()).iteritems()} inputs = argparse.Namespace(**parsed_config) if options.manifest: inputs.manifest = options.manifest inputs.cpu_count = multiprocessing.cpu_count() # FIXME: should not be called from toil-leader, see #186 inputs.memory = '15' # Launch Pipeline Job.Runner.startToil(Job.wrapJobFn(download_gatk_files, inputs, options.sample, options.output_dir, options.suffix), options)
def main(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the ADAM preprocessing pipeline') parser_run.add_argument('--config', default='adam_preprocessing.config', type=str, help='Path to the (filled in) config file, generated with "generate-config". ' '\nDefault value: "%(default)s"') parser_run.add_argument('--sample', help='The full s3 url of the input SAM or BAM file') parser_run.add_argument('--output-dir', default=None, help='full path where final results will be output') parser_run.add_argument('-s', '--suffix', default='', help='Additional suffix to add to the names of the output files') Job.Runner.addToilOptions(parser_run) args = parser.parse_args() cwd = os.getcwd() if args.command == 'generate-config': generate_file(os.path.join(cwd, 'adam-preprocessing.config'), generate_config) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run ' 'generate-config'.format(args.config)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} inputs = argparse.Namespace(**parsed_config) require(not (inputs.master_ip and inputs.num_nodes), 'Only one of master_ip and num_nodes can be provided.') if not hasattr(inputs, 'master_ip'): require(inputs.num_nodes > 1, 'num_nodes allocates one Spark/HDFS master and n-1 workers, and ' 'thus must be greater than 1. %d was passed.' % inputs.num_nodes) for arg in [inputs.dbsnp, inputs.memory]: require(arg, 'Required argument {} missing from config'.format(arg)) Job.Runner.startToil(Job.wrapJobFn(static_adam_preprocessing_dag, inputs, args.sample, args.output_dir), args)
def main(): """ Computational Genomics Lab, Genomics Institute, UC Santa Cruz Toil BWA pipeline Alignment of fastq reads via BWA-kit General usage: 1. Type "toil-bwa generate" to create an editable manifest and config in the current working directory. 2. Parameterize the pipeline by editing the config. 3. Fill in the manifest with information pertaining to your samples. 4. Type "toil-bwa run [jobStore]" to execute the pipeline. Please read the README.md located in the source directory or at: https://github.com/BD2KGenomics/toil-scripts/tree/master/src/toil_scripts/bwa_alignment Structure of the BWA pipeline (per sample) 0 --> 1 0 = Download sample 1 = Run BWA-kit =================================================================== :Dependencies: cURL: apt-get install curl Toil: pip install toil Docker: wget -qO- https://get.docker.com/ | sh Optional: S3AM: pip install --s3am (requires ~/.boto config file) Boto: pip install boto """ # Define Parser object and add to Toil parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter) subparsers = parser.add_subparsers(dest='command') # Generate subparsers subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.') subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the BWA alignment pipeline') group = parser_run.add_mutually_exclusive_group(required=True) parser_run.add_argument('--config', default='config-toil-bwa.yaml', type=str, help='Path to the (filled in) config file, generated with "generate-config".') group.add_argument('--manifest', default='manifest-toil-bwa.tsv', type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') group.add_argument('--sample', nargs='+', action=required_length(2, 3), help='Space delimited sample UUID and fastq files in the format: uuid url1 [url2].') # Print docstring help if no arguments provided if len(sys.argv) == 1: parser.print_help() sys.exit(1) Job.Runner.addToilOptions(parser_run) args = parser.parse_args() # Parse subparsers related to generation of config and manifest cwd = os.getcwd() if args.command == 'generate-config' or args.command == 'generate': generate_file(os.path.join(cwd, 'config-toil-bwa.yaml'), generate_config) if args.command == 'generate-manifest' or args.command == 'generate': generate_file(os.path.join(cwd, 'manifest-toil-bwa.tsv'), generate_manifest) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run generate-config'.format(args.config)) if not args.sample: args.sample = None require(os.path.exists(args.manifest), '{} not found and no sample provided. ' 'Please run "generate-manifest"'.format(args.manifest)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} config = argparse.Namespace(**parsed_config) config.maxCores = int(args.maxCores) if args.maxCores else sys.maxint samples = [args.sample] if args.sample else parse_manifest(args.manifest) # Sanity checks require(config.ref, 'Missing URL for reference file: {}'.format(config.ref)) require(config.output_dir, 'No output location specified: {}'.format(config.output_dir)) # Launch Pipeline Job.Runner.startToil(Job.wrapJobFn(download_reference_files, config, samples), args)
parser_run.add_argument('--config', default='gatk_germline.config', type=str, help='Path to the (filled in) config file, generated with "generate-config".') group.add_argument('--manifest', default='gatk-germline-manifest.tsv', type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') group.add_argument('--sample', default=None, nargs='2', type=str, help='Space delimited sample UUID and BAM file in the format: uuid url') parser_run.add_argument('--output-dir', default=None, help='Full path to directory or filename where ' 'final results will be output') parser_run.add_argument('-s', '--suffix', default='.bqsr', help='Additional suffix to add to the names of the output files') Job.Runner.addToilOptions(parser_run) options = parser.parse_args() cwd = os.getcwd() if options.command == 'generate-config' or options.command == 'generate': generate_file(os.path.join(cwd, 'gatk-preprocessing.config'), generate_config) if options.command == 'generate-manifest' or options.command == 'generate': generate_file(os.path.join(cwd, 'gatk-preprocessing-manifest.tsv'), generate_manifest) # Pipeline execution elif options.command == 'run': require(os.path.exists(options.config), '{} not found. Please run ' '"generate-config"'.format(options.config)) if not options.sample: require(os.path.exists(options.manifest), '{} not found and no sample provided. Please ' 'run "generate-manifest"'.format(options.manifest)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(options.config).read()).iteritems()} inputs = argparse.Namespace(**parsed_config) if options.manifest: inputs.manifest = options.manifest
def main(): """ This is a Toil pipeline used to perform alignment of fastqs. """ # Define Parser object and add to Toil if mock_mode(): usage_msg = 'You have the TOIL_SCRIPTS_MOCK_MODE environment variable set, so this pipeline ' \ 'will run in mock mode. To disable mock mode, set TOIL_SCRIPTS_MOCK_MODE=0' else: usage_msg = None parser = argparse.ArgumentParser(usage=usage_msg) subparsers = parser.add_subparsers(dest='command') subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.') subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.') subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.') # Run subparser parser_run = subparsers.add_parser('run', help='Runs the ADAM/GATK pipeline') default_config = 'adam-gatk-mock.config' if mock_mode() else 'adam-gatk.config' default_manifest = 'adam-gatk-mock-manifest.csv' if mock_mode() else 'adam-gatk-manifest.csv' parser_run.add_argument('--config', default=default_config, type=str, help='Path to the (filled in) config file, generated with "generate-config".') parser_run.add_argument('--manifest', default=default_manifest, type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". ' '\nDefault value: "%(default)s".') Job.Runner.addToilOptions(parser_run) args = parser.parse_args() cwd = os.getcwd() if args.command == 'generate-config' or args.command == 'generate': generate_file(os.path.join(cwd, default_config), generate_config) if args.command == 'generate-manifest' or args.command == 'generate': generate_file(os.path.join(cwd, default_manifest), generate_manifest) # Pipeline execution elif args.command == 'run': require(os.path.exists(args.config), '{} not found. Please run ' 'generate-config'.format(args.config)) if not hasattr(args, 'sample'): require(os.path.exists(args.manifest), '{} not found and no samples provided. Please ' 'run "generate-manifest"'.format(args.manifest)) # Parse config parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()} inputs = argparse.Namespace(**parsed_config) # Parse manifest file uuid_list = [] with open(args.manifest) as f_manifest: for line in f_manifest: if not line.isspace() and not line.startswith('#'): uuid_list.append(line.strip()) inputs.sort = False if not inputs.dir_suffix: inputs.dir_suffix = '' if not inputs.s3_bucket: inputs.s3_bucket = '' if inputs.master_ip and inputs.num_nodes: raise ValueError("Exactly one of master_ip (%s) and num_nodes (%d) must be provided." % (inputs.master_ip, inputs.num_nodes)) if not hasattr(inputs, 'master_ip') and inputs.num_nodes <= 1: raise ValueError('num_nodes allocates one Spark/HDFS master and n-1 workers, and thus must be greater ' 'than 1. %d was passed.' % inputs.num_nodes) if (inputs.pipeline_to_run != "adam" and inputs.pipeline_to_run != "gatk" and inputs.pipeline_to_run != "both"): raise ValueError("pipeline_to_run must be either 'adam', 'gatk', or 'both'. %s was passed." % inputs.pipeline_to_run) Job.Runner.startToil(Job.wrapJobFn(sample_loop, uuid_list, inputs), args)