def addOptions(parser): """ Adds toil options to a parser object, either optparse or argparse. """ # Wrapper function that allows toil to be used with both the optparse and # argparse option parsing modules addLoggingOptions(parser) # This adds the logging stuff. if isinstance(parser, OptionContainer): def addGroup(headingString, bodyString): group = OptionGroup(parser, headingString, bodyString) parser.add_option_group(group) return group.add_option _addOptions(addGroup, "%default") #parser.add_option_group(group) elif isinstance(parser, ArgumentParser): def addGroup(headingString, bodyString): return parser.add_argument_group(headingString, bodyString).add_argument _addOptions(addGroup, "%(default)s") else: raise RuntimeError( "Unanticipated class passed to addOptions(), %s. Expecting " "Either optparse.OptionParser or argparse.ArgumentParser" % parser.__class__)
def addOptions(parser, config=Config()): """ Adds toil options to a parser object, either optparse or argparse. """ # Wrapper function that allows toil to be used with both the optparse and # argparse option parsing modules addLoggingOptions(parser) # This adds the logging stuff. if isinstance(parser, ArgumentParser): def addGroup(headingString, bodyString): return parser.add_argument_group(headingString, bodyString).add_argument _addOptions(addGroup, config) else: raise RuntimeError("Unanticipated class passed to addOptions(), %s. Expecting " "argparse.ArgumentParser" % parser.__class__)
def addOptions(parser): """ Adds toil options to a parser object, either optparse or argparse. """ # Wrapper function that allows toil to be used with both the optparse and # argparse option parsing modules addLoggingOptions(parser) # This adds the logging stuff. if isinstance(parser, OptionContainer): def addGroup(headingString, bodyString): group = OptionGroup(parser, headingString, bodyString) parser.add_option_group(group) return group.add_option _addOptions(addGroup, "%default") #parser.add_option_group(group) elif isinstance(parser, ArgumentParser): def addGroup(headingString, bodyString): return parser.add_argument_group(headingString, bodyString).add_argument _addOptions(addGroup, "%(default)s") else: raise RuntimeError("Unanticipated class passed to addOptions(), %s. Expecting " "Either optparse.OptionParser or argparse.ArgumentParser" % parser.__class__)
def main(): """ Computational Genomics Lab, Genomics Institute, UC Santa Cruz Dockerized Toil RNA-seq pipeline RNA-seq fastqs are combined, aligned, and quantified with 2 different methods (RSEM and Kallisto) General Usage: docker run -v $(pwd):$(pwd) -v /var/run/docker.sock:/var/run/docker.sock \ quay.io/ucsc_cgl/rnaseq-cgl-pipeline --samples sample1.tar Please see the complete documentation located at: https://github.com/BD2KGenomics/cgl-docker-lib/tree/master/rnaseq-cgl-pipeline or inside the container at: /opt/rnaseq-pipeline/README.md Structure of RNA-Seq Pipeline (per sample) 3 -- 4 -- 5 / | 0 -- 1 -- 2 ---- 6 -- 8 \ | 7 --------- 0 = Download sample 1 = Unpack/Merge fastqs 2 = CutAdapt (adapter trimming) 3 = STAR Alignment 4 = RSEM Quantification 5 = RSEM Post-processing 6 = Kallisto 7 = FastQC 8 = Consoliate output and upload to S3 ======================================= Dependencies Docker """ # Define argument parser for parser = argparse.ArgumentParser( description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('--sample-tar', default=[], action="append", help='Absolute path to sample tarball.') parser.add_argument('--sample-single', default=[], action="append", help='Absolute path to sample single-ended FASTQ.') parser.add_argument( '--sample-paired', nargs='*', default=[], help= 'Absolute path to sample paired FASTQs, in the form `read1,read2,read1,read2`.' ) parser.add_argument('--output-basenames', nargs='*', default=[], help='Base names to use for naming the output files ') parser.add_argument('--star', type=str, default="", help='Absolute path to STAR index tarball.') parser.add_argument('--rsem', type=str, default="", help='Absolute path to rsem reference tarball.') parser.add_argument('--kallisto', type=str, default="", help='Absolute path to kallisto index (.idx) file.') parser.add_argument('--hera', type=str, default="", help='Absolute path to hera index (.idx) file.') parser.add_argument( '--disable-cutadapt', action='store_true', default=False, help= 'Cutadapt fails if samples are improperly paired. Use this flag to disable cutadapt.' ) parser.add_argument( '--save-bam', action='store_true', default='false', help='If this flag is used, genome-aligned bam is written to output.') parser.add_argument( '--save-wiggle', action='store_true', default='false', help='If this flag is used, wiggle files (.bg) are written to output.') parser.add_argument( '--no-clean', action='store_true', help='If this flag is used, temporary work directory is not cleaned.') parser.add_argument( '--resume', type=str, default=None, help= 'Pass the working directory that contains a job store to be resumed.') parser.add_argument( '--cores', type=int, default=None, help= 'Will set a cap on number of cores to use, default is all available cores.' ) parser.add_argument('--bamqc', action='store_true', default=None, help='Enable BAM QC step. Disabled by default') parser.add_argument( '--work_mount', required=True, help='Mount where intermediate files should be written. This directory ' 'should be mirror mounted into the container.') parser.add_argument( '--max-sample-size', default="20G", help='Maximum size of sample file using Toil resource requirements ' "syntax, e.g '20G'. Standard suffixes like K, Ki, M, Mi, G or Gi are supported." ) auto_scale_options = parser.add_argument_group('Auto-scaling options') auto_scale_options.add_argument( '--auto-scale', action='store_true', default=False, help='Enable Toil autoscaling. Disabled by default') auto_scale_options.add_argument( '--cluster-name', default="", help='Name of the Toil cluster. Usually the security group name') auto_scale_options.add_argument( '--job-store', default="aws:us-west-2:autoscaling-toil-rnaseq-jobstore-2", help='Directory in cloud where working files will be put; ' 'e.g. aws:us-west-2:autoscaling-toil-rnaseq-jobstore') auto_scale_options.add_argument( '--output-location', default="s3://toil-rnaseq-cloud-staging-area", help='Directory in cloud where output files will be put; ' 'e.g. s3://toil-rnaseq-cloud-staging-area') auto_scale_options.add_argument('--provisioner', default="aws", help='Cloud provisioner to use. E.g aws') auto_scale_options.add_argument( '--node-type', default="c3.8xlarge", help='Cloud worker VM type; e.g. c3.8xlarge') auto_scale_options.add_argument( '--max-nodes', type=int, default=2, help='Maximum worker nodes to launch. E.g. 2') auto_scale_options.add_argument('--credentials-id', default="", help='Credentials id') auto_scale_options.add_argument('--credentials-secret-key', default="", help='Credentials secret key') # although we don't actually set the log level in this module, the option is propagated to toil. For this reason # we want the logging options to show up with we run --help addLoggingOptions(parser) toilLoggingOption = '--logDebug' for arg in sys.argv: if 'log' in arg: toilLoggingOption = arg sys.argv.remove(toilLoggingOption) break args = parser.parse_args() args.toilLoggingOption = toilLoggingOption # If no arguments provided, print full help menu if len(sys.argv) == 1: parser.print_help() sys.exit(1) if args.auto_scale: if not args.cluster_name: log.info( 'Auto-scaling requires a cluster name to be input with the --cluster-name option' ) parser.error( 'Auto-scaling requires a cluster name to be input with the --cluster-name option' ) if not args.credentials_id or not args.credentials_secret_key: log.info( 'Auto-scaling requires provisioner credentials id and secret key' ) parser.error( 'Auto-scaling requires provisioner credentials id and secret key' ) # Get name of most recent running container. If socket is mounted, should be this one. try: name = subprocess.check_output( ['docker', 'ps', '--format', '{{.Names}}']).split('\n')[0] except subprocess.CalledProcessError as e: raise RuntimeError( 'No container detected, ensure Docker is being run with: ' '"-v /var/run/docker.sock:/var/run/docker.sock" as an argument. \n\n{}' .format(e.message)) # Get name of mounted volume blob = json.loads(subprocess.check_output(['docker', 'inspect', name])) mounts = blob[0]['Mounts'] # Ensure docker.sock is mounted correctly sock_mount = [ x['Source'] == x['Destination'] for x in mounts if 'docker.sock' in x['Source'] ] require( len(sock_mount) == 1, 'Missing socket mount. Requires the following: ' 'docker run -v /var/run/docker.sock:/var/run/docker.sock') work_mount = args.work_mount for samples in [args.sample_tar, args.sample_paired, args.sample_single]: if not samples: continue # Enforce file input standards if args.auto_scale: require( len(args.output_basenames) == len(samples), "There must be a " "unique output filename for each sample. You provided {}". format(args.output_basenames)) require(all( ((x.lower().startswith('http://') or x.lower().startswith('s3://') \ or x.lower().startswith('ftp://')) or not x) for x in samples), "Sample inputs must point to a file's full path, " "e.g. 's3://full/path/to/sample_R1.fastq.gz', and should start with " " file://, http://, s3://, or ftp://. You provided %s", str(samples)) else: # If sample is given as relative path, assume it's in the work directory if not all(x.startswith('/') for x in samples): samples = [ os.path.join(work_mount, x) for x in samples if not x.startswith('/') ] log.info( '\nSample given as relative path, assuming sample is in work directory: {}' .format(work_mount[0])) require( all(x.startswith('/') for x in samples), "Sample inputs must point to a file's full path, " "e.g. '/full/path/to/sample1.tar'. You provided %s", str(samples)) if samples == args.sample_tar: log.info('TARs to run: {}'.format('\t'.join(args.sample_tar))) if samples == args.sample_paired: log.info('Paired FASTQS to run: {}'.format('\t'.join( args.sample_paired))) if samples == args.sample_single: log.info('Single FASTQS to run: {}'.format('\t'.join( args.sample_single))) #file paths should start with /, file://, http://, s3://, or ftp:// if args.auto_scale: require(all( ((x.lower().startswith('http://') or x.lower().startswith('s3://') \ or x.lower().startswith('ftp://')) or not x) for x in [args.star, \ args.kallisto, args.rsem, args.hera]), "Sample inputs must point to a file's full path, " "e.g. 's3://full/path/to/kallisto_hg38.idx', and should start with file://, http://, s3://, or ftp://.") else: #Input for star and rsem will be empty if user wants to run kallisto only so test for not x require( all((x.startswith('/') or not x) for x in [args.star, args.kallisto, args.rsem, args.hera]), "Sample inputs must point to a file's full path, " "e.g. '/full/path/to/kallisto_hg38.idx'") # Output log information log.info('The work mount is: {}'.format(work_mount)) log.info('Pipeline input locations: \n{}\n{}\n{}\n{}'.format( args.star, args.rsem, args.kallisto, args.hera)) call_pipeline(work_mount, args)
help='Will set a cap on number of cores to use, default is all available cores.') parser.add_argument('--bamqc', action='store_true', default=None, help='Enable BAM QC step. Disabled by default') /* parser.add_arguement('--autoscaler', action='store_true' default=None, help='If it is true then it will automatically cluster the program') */ parser.add_argument('--work_mount', required=True, help='Mount where intermediate files should be written. This directory ' 'should be mirror mounted into the container.') # although we don't actually set the log level in this module, the option is propagated to toil. For this reason # we want the logging options to show up with we run --help addLoggingOptions(parser) toilLoggingOption = None for arg in sys.argv: if 'log' in arg: toilLoggingOption = arg sys.argv.remove(toilLoggingOption) break args = parser.parse_args() args.toilLoggingOption = toilLoggingOption # If no arguments provided, print full help menu if len(sys.argv) == 1: parser.print_help() sys.exit(1) # Get name of most recent running container. If socket is mounted, should be this one. try: name = subprocess.check_output(['docker', 'ps', '--format', '{{.Names}}']).split('\n')[0]
def main(): """ Computational Genomics Lab, Genomics Institute, UC Santa Cruz Dockerized Toil RNA-seq pipeline RNA-seq fastqs are combined, aligned, and quantified with 2 different methods (RSEM and Kallisto) General Usage: docker run -v $(pwd):$(pwd) -v /var/run/docker.sock:/var/run/docker.sock \ quay.io/ucsc_cgl/rnaseq-cgl-pipeline --samples sample1.tar Please see the complete documentation located at: https://github.com/BD2KGenomics/cgl-docker-lib/tree/master/rnaseq-cgl-pipeline or inside the container at: /opt/rnaseq-pipeline/README.md Structure of RNA-Seq Pipeline (per sample) 3 -- 4 -- 5 / | 0 -- 1 -- 2 ---- 6 -- 8 \ | 7 --------- 0 = Download sample 1 = Unpack/Merge fastqs 2 = CutAdapt (adapter trimming) 3 = STAR Alignment 4 = RSEM Quantification 5 = RSEM Post-processing 6 = Kallisto 7 = FastQC 8 = Consoliate output and upload to S3 ======================================= Dependencies Docker """ # Define argument parser for parser = argparse.ArgumentParser( description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('--sample-tar', default=[], action="append", help='Absolute path to sample tarball.') parser.add_argument('--sample-single', default=[], action="append", help='Absolute path to sample single-ended FASTQ.') parser.add_argument( '--sample-paired', default=[], action="append", help= 'Absolute path to sample paired FASTQs, in the form `read1,read2,read1,read2`.' ) parser.add_argument('--star', type=str, required=True, help='Absolute path to STAR index tarball.') parser.add_argument('--rsem', type=str, required=True, help='Absolute path to rsem reference tarball.') parser.add_argument('--kallisto', type=str, required=True, help='Absolute path to kallisto index (.idx) file.') parser.add_argument( '--disable-cutadapt', action='store_true', default=False, help= 'Cutadapt fails if samples are improperly paired. Use this flag to disable cutadapt.' ) parser.add_argument( '--save-bam', action='store_true', default='false', help='If this flag is used, genome-aligned bam is written to output.') parser.add_argument( '--save-wiggle', action='store_true', default='false', help='If this flag is used, wiggle files (.bg) are written to output.') parser.add_argument( '--no-clean', action='store_true', help='If this flag is used, temporary work directory is not cleaned.') parser.add_argument( '--resume', type=str, default=None, help= 'Pass the working directory that contains a job store to be resumed.') parser.add_argument( '--cores', type=int, default=None, help= 'Will set a cap on number of cores to use, default is all available cores.' ) parser.add_argument('--bamqc', action='store_true', default=None, help='Enable BAM QC step. Disabled by default') parser.add_argument( '--work_mount', required=True, help='Mount where intermediate files should be written. This directory ' 'should be mirror mounted into the container.') parser.add_argument('--output-basename', default="", help='Base name to use for naming the output files ') # although we don't actually set the log level in this module, the option is propagated to toil. For this reason # we want the logging options to show up with we run --help addLoggingOptions(parser) toilLoggingOption = None for arg in sys.argv: if 'log' in arg: toilLoggingOption = arg sys.argv.remove(toilLoggingOption) break args = parser.parse_args() args.toilLoggingOption = toilLoggingOption # If no arguments provided, print full help menu if len(sys.argv) == 1: parser.print_help() sys.exit(1) # Get name of most recent running container. If socket is mounted, should be this one. try: name = subprocess.check_output( ['docker', 'ps', '--format', '{{.Names}}']).split('\n')[0] except subprocess.CalledProcessError as e: raise RuntimeError( 'No container detected, ensure Docker is being run with: ' '"-v /var/run/docker.sock:/var/run/docker.sock" as an argument. \n\n{}' .format(e.message)) # Get name of mounted volume blob = json.loads(subprocess.check_output(['docker', 'inspect', name])) mounts = blob[0]['Mounts'] # Ensure docker.sock is mounted correctly sock_mount = [ x['Source'] == x['Destination'] for x in mounts if 'docker.sock' in x['Source'] ] require( len(sock_mount) == 1, 'Missing socket mount. Requires the following: ' 'docker run -v /var/run/docker.sock:/var/run/docker.sock') work_mount = args.work_mount #create work_mount directories if they don't exist yet. cmd = ["mkdir", "-p", work_mount] log.info('Creating directory: %s', work_mount) subprocess.call(cmd) curr_mount = os.path.join(os.getcwd(), work_mount) cmd = ["mkdir", "-p", curr_mount] log.info('Creating directory: %s', curr_mount) subprocess.call(cmd) for samples in [args.sample_tar, args.sample_paired, args.sample_single]: if not samples: continue # If sample is given as relative path, assume it's in the work directory if not all(x.startswith('/') for x in samples): samples = [ os.path.join(work_mount, x) for x in samples if not x.startswith('/') ] log.info( '\nSample given as relative path, assuming sample is in work directory: {}' .format(work_mount[0])) # Enforce file input standards require( all(x.startswith('/') for x in samples), "Sample inputs must point to a file's full path, " "e.g. '/full/path/to/sample1.tar'. You provided %s", str(samples)) if samples == args.sample_tar: log.info('TARs to run: {}'.format('\t'.join(args.sample_tar))) if samples == args.sample_paired: log.info('Paired FASTQS to run: {}'.format('\t'.join( args.sample_paired))) if samples == args.sample_single: log.info('Single FASTQS to run: {}'.format('\t'.join( args.sample_single))) require( all(x.startswith('/') for x in [args.star, args.kallisto, args.rsem]), "Sample inputs must point to a file's full path, " "e.g. '/full/path/to/kallisto_hg38.idx'.") # Output log information log.info('The work mount is: {}'.format(work_mount)) log.info('Pipeline input locations: \n{}\n{}\n{}'.format( args.star, args.rsem, args.kallisto)) call_pipeline(work_mount, args)