def main():
    """
    GATK Pre-processing Script
    """
    # Define Parser object and add to jobTree
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest='command')
    # Generate subparsers
    subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.')
    subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.')
    subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.')
    # Run subparser
    parser_run = subparsers.add_parser('run', help='Runs the GATK preprocessing pipeline')
    group = parser_run.add_mutually_exclusive_group(required=True)
    parser_run.add_argument('--config', default='gatk_preprocessing.config', type=str,
                            help='Path to the (filled in) config file, generated with "generate-config".')
    group.add_argument('--manifest', default='bwa-alignment-manifest.tsv', type=str,
                       help='Path to the (filled in) manifest file, generated with "generate-manifest". '
                            '\nDefault value: "%(default)s".')
    group.add_argument('--sample', default=None, nargs='2', type=str,
                       help='Space delimited sample UUID and BAM file in the format: uuid url.')
    parser_run.add_argument('--output-dir', default=None, help='Full path to directory or filename where '
                                                               'final results will be output')    
    parser_run.add_argument('-s', '--suffix', default='.bqsr', help='Additional suffix to add to the names of the output files')
    Job.Runner.addToilOptions(parser_run)
    options = parser.parse_args()

    cwd = os.getcwd()
    if options.command == 'generate-config' or options.command == 'generate':
        generate_file(os.path.join(cwd, 'gatk-preprocessing.config'), generate_config)
    if options.command == 'generate-manifest' or options.command == 'generate':
        generate_file(os.path.join(cwd, 'gatk-preprocessing-manifest.csv'), generate_manifest)

    # Pipeline execution
    elif options.command == 'run':
        require(os.path.exists(options.config), '{} not found. Please run '
                                             '"generate-config"'.format(options.config))
        if not options.sample:
            require(os.path.exists(options.manifest), '{} not found and no sample provided. Please '
                                                       'run "generate-manifest"'.format(options.manifest))
        # Parse config
        parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(options.config).read()).iteritems()}
        inputs = argparse.Namespace(**parsed_config)
        if options.manifest:
            inputs.manifest = options.manifest

    inputs.cpu_count = multiprocessing.cpu_count() # FIXME: should not be called from toil-leader, see #186
    inputs.memory =  '15'

    # Launch Pipeline
    Job.Runner.startToil(Job.wrapJobFn(download_gatk_files, inputs, options.sample, options.output_dir, options.suffix), options)
def main():

    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest='command')
    # Generate subparsers
    subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.')
    # Run subparser
    parser_run = subparsers.add_parser('run', help='Runs the ADAM preprocessing pipeline')
    parser_run.add_argument('--config', default='adam_preprocessing.config', type=str,
                            help='Path to the (filled in) config file, generated with "generate-config". '
                                 '\nDefault value: "%(default)s"')
    parser_run.add_argument('--sample', help='The full s3 url of the input SAM or BAM file')
    parser_run.add_argument('--output-dir', default=None,
                            help='full path where final results will be output')
    parser_run.add_argument('-s', '--suffix', default='',
                            help='Additional suffix to add to the names of the output files')

    Job.Runner.addToilOptions(parser_run)
    args = parser.parse_args()
    cwd = os.getcwd()
    if args.command == 'generate-config':
        generate_file(os.path.join(cwd, 'adam-preprocessing.config'), generate_config)
    # Pipeline execution
    elif args.command == 'run':
        require(os.path.exists(args.config), '{} not found. Please run '
                                             'generate-config'.format(args.config))
        # Parse config
        parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()}
        inputs = argparse.Namespace(**parsed_config)

        require(not (inputs.master_ip and inputs.num_nodes),
            'Only one of master_ip and num_nodes can be provided.')

        if not hasattr(inputs, 'master_ip'):
            require(inputs.num_nodes > 1,
                'num_nodes allocates one Spark/HDFS master and n-1 workers, and '
                'thus must be greater than 1. %d was passed.' % inputs.num_nodes)

        for arg in [inputs.dbsnp, inputs.memory]:
            require(arg, 'Required argument {} missing from config'.format(arg))

            Job.Runner.startToil(Job.wrapJobFn(static_adam_preprocessing_dag, inputs,
                                               args.sample, args.output_dir), args)
def main():
    """
    Computational Genomics Lab, Genomics Institute, UC Santa Cruz
    Toil BWA pipeline

    Alignment of fastq reads via BWA-kit

    General usage:
    1. Type "toil-bwa generate" to create an editable manifest and config in the current working directory.
    2. Parameterize the pipeline by editing the config.
    3. Fill in the manifest with information pertaining to your samples.
    4. Type "toil-bwa run [jobStore]" to execute the pipeline.

    Please read the README.md located in the source directory or at:
    https://github.com/BD2KGenomics/toil-scripts/tree/master/src/toil_scripts/bwa_alignment

    Structure of the BWA pipeline (per sample)

        0 --> 1

    0 = Download sample
    1 = Run BWA-kit
    ===================================================================
    :Dependencies:
    cURL:       apt-get install curl
    Toil:       pip install toil
    Docker:     wget -qO- https://get.docker.com/ | sh

    Optional:
    S3AM:       pip install --s3am (requires ~/.boto config file)
    Boto:       pip install boto
    """
    # Define Parser object and add to Toil
    parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter)
    subparsers = parser.add_subparsers(dest='command')
    # Generate subparsers
    subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.')
    subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.')
    subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.')
    # Run subparser
    parser_run = subparsers.add_parser('run', help='Runs the BWA alignment pipeline')
    group = parser_run.add_mutually_exclusive_group(required=True)
    parser_run.add_argument('--config', default='config-toil-bwa.yaml', type=str,
                            help='Path to the (filled in) config file, generated with "generate-config".')
    group.add_argument('--manifest', default='manifest-toil-bwa.tsv', type=str,
                       help='Path to the (filled in) manifest file, generated with "generate-manifest". '
                            '\nDefault value: "%(default)s".')
    group.add_argument('--sample', nargs='+', action=required_length(2, 3),
                       help='Space delimited sample UUID and fastq files in the format: uuid url1 [url2].')
    # Print docstring help if no arguments provided
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    Job.Runner.addToilOptions(parser_run)
    args = parser.parse_args()
    # Parse subparsers related to generation of config and manifest
    cwd = os.getcwd()
    if args.command == 'generate-config' or args.command == 'generate':
        generate_file(os.path.join(cwd, 'config-toil-bwa.yaml'), generate_config)
    if args.command == 'generate-manifest' or args.command == 'generate':
        generate_file(os.path.join(cwd, 'manifest-toil-bwa.tsv'), generate_manifest)
    # Pipeline execution
    elif args.command == 'run':
        require(os.path.exists(args.config), '{} not found. Please run generate-config'.format(args.config))
        if not args.sample:
            args.sample = None
            require(os.path.exists(args.manifest), '{} not found and no sample provided. '
                                                   'Please run "generate-manifest"'.format(args.manifest))
        # Parse config
        parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()}
        config = argparse.Namespace(**parsed_config)
        config.maxCores = int(args.maxCores) if args.maxCores else sys.maxint
        samples = [args.sample] if args.sample else parse_manifest(args.manifest)
        # Sanity checks
        require(config.ref, 'Missing URL for reference file: {}'.format(config.ref))
        require(config.output_dir, 'No output location specified: {}'.format(config.output_dir))
        # Launch Pipeline
        Job.Runner.startToil(Job.wrapJobFn(download_reference_files, config, samples), args)
Example #4
0
    parser_run.add_argument('--config', default='gatk_germline.config', type=str,
                            help='Path to the (filled in) config file, generated with "generate-config".')
    group.add_argument('--manifest', default='gatk-germline-manifest.tsv', type=str,
                       help='Path to the (filled in) manifest file, generated with "generate-manifest". '
                            '\nDefault value: "%(default)s".')
    group.add_argument('--sample', default=None, nargs='2', type=str,
                       help='Space delimited sample UUID and BAM file in the format: uuid url')
    parser_run.add_argument('--output-dir', default=None, help='Full path to directory or filename where '
                                                               'final results will be output')    
    parser_run.add_argument('-s', '--suffix', default='.bqsr', help='Additional suffix to add to the names of the output files')
    Job.Runner.addToilOptions(parser_run)
    options = parser.parse_args()

    cwd = os.getcwd()
    if options.command == 'generate-config' or options.command == 'generate':
        generate_file(os.path.join(cwd, 'gatk-preprocessing.config'), generate_config)
    if options.command == 'generate-manifest' or options.command == 'generate':
        generate_file(os.path.join(cwd, 'gatk-preprocessing-manifest.tsv'), generate_manifest)

    # Pipeline execution
    elif options.command == 'run':
        require(os.path.exists(options.config), '{} not found. Please run '
                                             '"generate-config"'.format(options.config))
        if not options.sample:
            require(os.path.exists(options.manifest), '{} not found and no sample provided. Please '
                                                       'run "generate-manifest"'.format(options.manifest))
        # Parse config
        parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(options.config).read()).iteritems()}
        inputs = argparse.Namespace(**parsed_config)
        if options.manifest:
            inputs.manifest = options.manifest
def main():
    """
    This is a Toil pipeline used to perform alignment of fastqs.
    """
    # Define Parser object and add to Toil
    if mock_mode():
        usage_msg = 'You have the TOIL_SCRIPTS_MOCK_MODE environment variable set, so this pipeline ' \
                    'will run in mock mode. To disable mock mode, set TOIL_SCRIPTS_MOCK_MODE=0'
    else:
        usage_msg = None

    parser = argparse.ArgumentParser(usage=usage_msg)
    subparsers = parser.add_subparsers(dest='command')
    subparsers.add_parser('generate-config', help='Generates an editable config in the current working directory.')
    subparsers.add_parser('generate-manifest', help='Generates an editable manifest in the current working directory.')
    subparsers.add_parser('generate', help='Generates a config and manifest in the current working directory.')
    # Run subparser                                                                                                              
    parser_run = subparsers.add_parser('run', help='Runs the ADAM/GATK pipeline')
    default_config = 'adam-gatk-mock.config' if mock_mode() else 'adam-gatk.config'
    default_manifest = 'adam-gatk-mock-manifest.csv' if mock_mode() else 'adam-gatk-manifest.csv'
    parser_run.add_argument('--config', default=default_config, type=str,
                            help='Path to the (filled in) config file, generated with "generate-config".')
    parser_run.add_argument('--manifest', default=default_manifest,
                            type=str, help='Path to the (filled in) manifest file, generated with "generate-manifest". '
                                           '\nDefault value: "%(default)s".')
    Job.Runner.addToilOptions(parser_run)
    args = parser.parse_args()

    cwd = os.getcwd()
    if args.command == 'generate-config' or args.command == 'generate':
        generate_file(os.path.join(cwd, default_config), generate_config)
    if args.command == 'generate-manifest' or args.command == 'generate':
        generate_file(os.path.join(cwd, default_manifest), generate_manifest)
    # Pipeline execution
    elif args.command == 'run':
        require(os.path.exists(args.config), '{} not found. Please run '
                                             'generate-config'.format(args.config))
        if not hasattr(args, 'sample'):
            require(os.path.exists(args.manifest), '{} not found and no samples provided. Please '
                                                   'run "generate-manifest"'.format(args.manifest))
        # Parse config
        parsed_config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()}
        inputs = argparse.Namespace(**parsed_config)

        # Parse manifest file
        uuid_list = []
        with open(args.manifest) as f_manifest:
            for line in f_manifest:
                if not line.isspace() and not line.startswith('#'):
                    uuid_list.append(line.strip())

        inputs.sort = False
        if not inputs.dir_suffix:
            inputs.dir_suffix = ''
        if not inputs.s3_bucket:
            inputs.s3_bucket = ''

        if inputs.master_ip and inputs.num_nodes:
            raise ValueError("Exactly one of master_ip (%s) and num_nodes (%d) must be provided." %
                             (inputs.master_ip, inputs.num_nodes))

        if not hasattr(inputs, 'master_ip') and inputs.num_nodes <= 1:
            raise ValueError('num_nodes allocates one Spark/HDFS master and n-1 workers, and thus must be greater '
                             'than 1. %d was passed.' % inputs.num_nodes)

        if (inputs.pipeline_to_run != "adam" and
            inputs.pipeline_to_run != "gatk" and
            inputs.pipeline_to_run != "both"):
            raise ValueError("pipeline_to_run must be either 'adam', 'gatk', or 'both'. %s was passed." % inputs.pipeline_to_run)

        Job.Runner.startToil(Job.wrapJobFn(sample_loop, uuid_list, inputs), args)