Ejemplo n.º 1
0
def addOptions(parser):
    """
    Adds toil options to a parser object, either optparse or argparse.
    """
    # Wrapper function that allows toil to be used with both the optparse and
    # argparse option parsing modules
    addLoggingOptions(parser)  # This adds the logging stuff.
    if isinstance(parser, OptionContainer):

        def addGroup(headingString, bodyString):
            group = OptionGroup(parser, headingString, bodyString)
            parser.add_option_group(group)
            return group.add_option

        _addOptions(addGroup, "%default")
        #parser.add_option_group(group)
    elif isinstance(parser, ArgumentParser):

        def addGroup(headingString, bodyString):
            return parser.add_argument_group(headingString,
                                             bodyString).add_argument

        _addOptions(addGroup, "%(default)s")
    else:
        raise RuntimeError(
            "Unanticipated class passed to addOptions(), %s. Expecting "
            "Either optparse.OptionParser or argparse.ArgumentParser" %
            parser.__class__)
Ejemplo n.º 2
0
def addOptions(parser, config=Config()):
    """
    Adds toil options to a parser object, either optparse or argparse.
    """
    # Wrapper function that allows toil to be used with both the optparse and
    # argparse option parsing modules
    addLoggingOptions(parser) # This adds the logging stuff.
    if isinstance(parser, ArgumentParser):
        def addGroup(headingString, bodyString):
            return parser.add_argument_group(headingString, bodyString).add_argument
        _addOptions(addGroup, config)
    else:
        raise RuntimeError("Unanticipated class passed to addOptions(), %s. Expecting "
                           "argparse.ArgumentParser" % parser.__class__)
Ejemplo n.º 3
0
def addOptions(parser, config=Config()):
    """
    Adds toil options to a parser object, either optparse or argparse.
    """
    # Wrapper function that allows toil to be used with both the optparse and
    # argparse option parsing modules
    addLoggingOptions(parser) # This adds the logging stuff.
    if isinstance(parser, ArgumentParser):
        def addGroup(headingString, bodyString):
            return parser.add_argument_group(headingString, bodyString).add_argument
        _addOptions(addGroup, config)
    else:
        raise RuntimeError("Unanticipated class passed to addOptions(), %s. Expecting "
                           "argparse.ArgumentParser" % parser.__class__)
Ejemplo n.º 4
0
def addOptions(parser):
    """
    Adds toil options to a parser object, either optparse or argparse.
    """
    # Wrapper function that allows toil to be used with both the optparse and
    # argparse option parsing modules
    addLoggingOptions(parser) # This adds the logging stuff.
    if isinstance(parser, OptionContainer):
        def addGroup(headingString, bodyString):
            group = OptionGroup(parser, headingString, bodyString)
            parser.add_option_group(group)
            return group.add_option

        _addOptions(addGroup, "%default")
        #parser.add_option_group(group)
    elif isinstance(parser, ArgumentParser):
        def addGroup(headingString, bodyString):
            return parser.add_argument_group(headingString, bodyString).add_argument

        _addOptions(addGroup, "%(default)s")
    else:
        raise RuntimeError("Unanticipated class passed to addOptions(), %s. Expecting "
                           "Either optparse.OptionParser or argparse.ArgumentParser" % parser.__class__)
Ejemplo n.º 5
0
def main():
    """
    Computational Genomics Lab, Genomics Institute, UC Santa Cruz
    Dockerized Toil RNA-seq pipeline

    RNA-seq fastqs are combined, aligned, and quantified with 2 different methods (RSEM and Kallisto)

    General Usage:
    docker run -v $(pwd):$(pwd) -v /var/run/docker.sock:/var/run/docker.sock \
    quay.io/ucsc_cgl/rnaseq-cgl-pipeline --samples sample1.tar

    Please see the complete documentation located at:
    https://github.com/BD2KGenomics/cgl-docker-lib/tree/master/rnaseq-cgl-pipeline
    or inside the container at: /opt/rnaseq-pipeline/README.md


    Structure of RNA-Seq Pipeline (per sample)

                  3 -- 4 -- 5
                 /          |
      0 -- 1 -- 2 ---- 6 -- 8
                 \          |
                  7 ---------

    0 = Download sample
    1 = Unpack/Merge fastqs
    2 = CutAdapt (adapter trimming)
    3 = STAR Alignment
    4 = RSEM Quantification
    5 = RSEM Post-processing
    6 = Kallisto
    7 = FastQC
    8 = Consoliate output and upload to S3
    =======================================
    Dependencies
    Docker
    """
    # Define argument parser for
    parser = argparse.ArgumentParser(
        description=main.__doc__,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--sample-tar',
                        default=[],
                        action="append",
                        help='Absolute path to sample tarball.')
    parser.add_argument('--sample-single',
                        default=[],
                        action="append",
                        help='Absolute path to sample single-ended FASTQ.')
    parser.add_argument(
        '--sample-paired',
        nargs='*',
        default=[],
        help=
        'Absolute path to sample paired FASTQs, in the form `read1,read2,read1,read2`.'
    )
    parser.add_argument('--output-basenames',
                        nargs='*',
                        default=[],
                        help='Base names to use for naming the output files ')

    parser.add_argument('--star',
                        type=str,
                        default="",
                        help='Absolute path to STAR index tarball.')
    parser.add_argument('--rsem',
                        type=str,
                        default="",
                        help='Absolute path to rsem reference tarball.')
    parser.add_argument('--kallisto',
                        type=str,
                        default="",
                        help='Absolute path to kallisto index (.idx) file.')
    parser.add_argument('--hera',
                        type=str,
                        default="",
                        help='Absolute path to hera index (.idx) file.')
    parser.add_argument(
        '--disable-cutadapt',
        action='store_true',
        default=False,
        help=
        'Cutadapt fails if samples are improperly paired. Use this flag to disable cutadapt.'
    )
    parser.add_argument(
        '--save-bam',
        action='store_true',
        default='false',
        help='If this flag is used, genome-aligned bam is written to output.')
    parser.add_argument(
        '--save-wiggle',
        action='store_true',
        default='false',
        help='If this flag is used, wiggle files (.bg) are written to output.')
    parser.add_argument(
        '--no-clean',
        action='store_true',
        help='If this flag is used, temporary work directory is not cleaned.')
    parser.add_argument(
        '--resume',
        type=str,
        default=None,
        help=
        'Pass the working directory that contains a job store to be resumed.')
    parser.add_argument(
        '--cores',
        type=int,
        default=None,
        help=
        'Will set a cap on number of cores to use, default is all available cores.'
    )
    parser.add_argument('--bamqc',
                        action='store_true',
                        default=None,
                        help='Enable BAM QC step. Disabled by default')
    parser.add_argument(
        '--work_mount',
        required=True,
        help='Mount where intermediate files should be written. This directory '
        'should be mirror mounted into the container.')
    parser.add_argument(
        '--max-sample-size',
        default="20G",
        help='Maximum size of sample file using Toil resource requirements '
        "syntax, e.g '20G'. Standard suffixes like K, Ki, M, Mi, G or Gi are supported."
    )

    auto_scale_options = parser.add_argument_group('Auto-scaling options')
    auto_scale_options.add_argument(
        '--auto-scale',
        action='store_true',
        default=False,
        help='Enable Toil autoscaling. Disabled by default')
    auto_scale_options.add_argument(
        '--cluster-name',
        default="",
        help='Name of the Toil cluster. Usually the security group name')
    auto_scale_options.add_argument(
        '--job-store',
        default="aws:us-west-2:autoscaling-toil-rnaseq-jobstore-2",
        help='Directory in cloud where working files will be put; '
        'e.g. aws:us-west-2:autoscaling-toil-rnaseq-jobstore')
    auto_scale_options.add_argument(
        '--output-location',
        default="s3://toil-rnaseq-cloud-staging-area",
        help='Directory in cloud where  output files will be put; '
        'e.g. s3://toil-rnaseq-cloud-staging-area')
    auto_scale_options.add_argument('--provisioner',
                                    default="aws",
                                    help='Cloud provisioner to use. E.g aws')
    auto_scale_options.add_argument(
        '--node-type',
        default="c3.8xlarge",
        help='Cloud worker VM type; e.g. c3.8xlarge')
    auto_scale_options.add_argument(
        '--max-nodes',
        type=int,
        default=2,
        help='Maximum worker nodes to launch. E.g. 2')
    auto_scale_options.add_argument('--credentials-id',
                                    default="",
                                    help='Credentials id')
    auto_scale_options.add_argument('--credentials-secret-key',
                                    default="",
                                    help='Credentials secret key')

    # although we don't actually set the log level in this module, the option is propagated to toil. For this reason
    # we want the logging options to show up with we run --help
    addLoggingOptions(parser)
    toilLoggingOption = '--logDebug'
    for arg in sys.argv:
        if 'log' in arg:
            toilLoggingOption = arg
            sys.argv.remove(toilLoggingOption)
            break
    args = parser.parse_args()
    args.toilLoggingOption = toilLoggingOption
    # If no arguments provided, print full help menu
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    if args.auto_scale:
        if not args.cluster_name:
            log.info(
                'Auto-scaling requires a cluster name to be input with the --cluster-name option'
            )
            parser.error(
                'Auto-scaling requires a cluster name to be input with the --cluster-name option'
            )
        if not args.credentials_id or not args.credentials_secret_key:
            log.info(
                'Auto-scaling requires provisioner credentials id and secret key'
            )
            parser.error(
                'Auto-scaling requires provisioner credentials id and secret key'
            )

    # Get name of most recent running container. If socket is mounted, should be this one.
    try:
        name = subprocess.check_output(
            ['docker', 'ps', '--format', '{{.Names}}']).split('\n')[0]
    except subprocess.CalledProcessError as e:
        raise RuntimeError(
            'No container detected, ensure Docker is being run with: '
            '"-v /var/run/docker.sock:/var/run/docker.sock" as an argument. \n\n{}'
            .format(e.message))
    # Get name of mounted volume
    blob = json.loads(subprocess.check_output(['docker', 'inspect', name]))
    mounts = blob[0]['Mounts']
    # Ensure docker.sock is mounted correctly
    sock_mount = [
        x['Source'] == x['Destination'] for x in mounts
        if 'docker.sock' in x['Source']
    ]
    require(
        len(sock_mount) == 1, 'Missing socket mount. Requires the following: '
        'docker run -v /var/run/docker.sock:/var/run/docker.sock')
    work_mount = args.work_mount
    for samples in [args.sample_tar, args.sample_paired, args.sample_single]:
        if not samples:
            continue

        # Enforce file input standards
        if args.auto_scale:
            require(
                len(args.output_basenames) == len(samples), "There must be a "
                "unique output filename for each sample. You provided {}".
                format(args.output_basenames))

            require(all( ((x.lower().startswith('http://') or x.lower().startswith('s3://') \
                or x.lower().startswith('ftp://')) or not x) for x in samples),
            "Sample inputs must point to a file's full path, "
            "e.g. 's3://full/path/to/sample_R1.fastq.gz', and should start with "
            " file://, http://, s3://, or ftp://.  You provided %s", str(samples))
        else:
            # If sample is given as relative path, assume it's in the work directory
            if not all(x.startswith('/') for x in samples):
                samples = [
                    os.path.join(work_mount, x) for x in samples
                    if not x.startswith('/')
                ]
                log.info(
                    '\nSample given as relative path, assuming sample is in work directory: {}'
                    .format(work_mount[0]))

            require(
                all(x.startswith('/') for x in samples),
                "Sample inputs must point to a file's full path, "
                "e.g. '/full/path/to/sample1.tar'. You provided %s",
                str(samples))
        if samples == args.sample_tar:
            log.info('TARs to run: {}'.format('\t'.join(args.sample_tar)))
        if samples == args.sample_paired:
            log.info('Paired FASTQS to run: {}'.format('\t'.join(
                args.sample_paired)))
        if samples == args.sample_single:
            log.info('Single FASTQS to run: {}'.format('\t'.join(
                args.sample_single)))

    #file paths should start with /, file://, http://, s3://, or ftp://
    if args.auto_scale:
        require(all( ((x.lower().startswith('http://') or x.lower().startswith('s3://') \
                or x.lower().startswith('ftp://')) or not x) for x in [args.star, \
                             args.kallisto, args.rsem, args.hera]),
            "Sample inputs must point to a file's full path, "
            "e.g. 's3://full/path/to/kallisto_hg38.idx', and should start with file://, http://, s3://, or ftp://.")
    else:
        #Input for star and rsem will be empty if user wants to run kallisto only so test for not x
        require(
            all((x.startswith('/') or not x)
                for x in [args.star, args.kallisto, args.rsem, args.hera]),
            "Sample inputs must point to a file's full path, "
            "e.g. '/full/path/to/kallisto_hg38.idx'")

    # Output log information
    log.info('The work mount is: {}'.format(work_mount))
    log.info('Pipeline input locations: \n{}\n{}\n{}\n{}'.format(
        args.star, args.rsem, args.kallisto, args.hera))
    call_pipeline(work_mount, args)
Ejemplo n.º 6
0
                        help='Will set a cap on number of cores to use, default is all available cores.')
    parser.add_argument('--bamqc', action='store_true', default=None,
                        help='Enable BAM QC step. Disabled by default')

/*
    parser.add_arguement('--autoscaler', action='store_true' default=None, 
                            help='If it is true then it will automatically cluster the program')
*/


    parser.add_argument('--work_mount', required=True,
                        help='Mount where intermediate files should be written. This directory '
                             'should be mirror mounted into the container.')
    # although we don't actually set the log level in this module, the option is propagated to toil. For this reason
    # we want the logging options to show up with we run --help
    addLoggingOptions(parser)
    toilLoggingOption = None
    for arg in sys.argv:
        if 'log' in arg:
            toilLoggingOption = arg
            sys.argv.remove(toilLoggingOption)
            break
    args = parser.parse_args()
    args.toilLoggingOption = toilLoggingOption
    # If no arguments provided, print full help menu
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    # Get name of most recent running container. If socket is mounted, should be this one.
    try:
        name = subprocess.check_output(['docker', 'ps', '--format', '{{.Names}}']).split('\n')[0]
Ejemplo n.º 7
0
def main():
    """
    Computational Genomics Lab, Genomics Institute, UC Santa Cruz
    Dockerized Toil RNA-seq pipeline

    RNA-seq fastqs are combined, aligned, and quantified with 2 different methods (RSEM and Kallisto)

    General Usage:
    docker run -v $(pwd):$(pwd) -v /var/run/docker.sock:/var/run/docker.sock \
    quay.io/ucsc_cgl/rnaseq-cgl-pipeline --samples sample1.tar

    Please see the complete documentation located at:
    https://github.com/BD2KGenomics/cgl-docker-lib/tree/master/rnaseq-cgl-pipeline
    or inside the container at: /opt/rnaseq-pipeline/README.md


    Structure of RNA-Seq Pipeline (per sample)

                  3 -- 4 -- 5
                 /          |
      0 -- 1 -- 2 ---- 6 -- 8
                 \          |
                  7 ---------

    0 = Download sample
    1 = Unpack/Merge fastqs
    2 = CutAdapt (adapter trimming)
    3 = STAR Alignment
    4 = RSEM Quantification
    5 = RSEM Post-processing
    6 = Kallisto
    7 = FastQC
    8 = Consoliate output and upload to S3
    =======================================
    Dependencies
    Docker
    """
    # Define argument parser for
    parser = argparse.ArgumentParser(
        description=main.__doc__,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--sample-tar',
                        default=[],
                        action="append",
                        help='Absolute path to sample tarball.')
    parser.add_argument('--sample-single',
                        default=[],
                        action="append",
                        help='Absolute path to sample single-ended FASTQ.')
    parser.add_argument(
        '--sample-paired',
        default=[],
        action="append",
        help=
        'Absolute path to sample paired FASTQs, in the form `read1,read2,read1,read2`.'
    )
    parser.add_argument('--star',
                        type=str,
                        required=True,
                        help='Absolute path to STAR index tarball.')
    parser.add_argument('--rsem',
                        type=str,
                        required=True,
                        help='Absolute path to rsem reference tarball.')
    parser.add_argument('--kallisto',
                        type=str,
                        required=True,
                        help='Absolute path to kallisto index (.idx) file.')
    parser.add_argument(
        '--disable-cutadapt',
        action='store_true',
        default=False,
        help=
        'Cutadapt fails if samples are improperly paired. Use this flag to disable cutadapt.'
    )
    parser.add_argument(
        '--save-bam',
        action='store_true',
        default='false',
        help='If this flag is used, genome-aligned bam is written to output.')
    parser.add_argument(
        '--save-wiggle',
        action='store_true',
        default='false',
        help='If this flag is used, wiggle files (.bg) are written to output.')
    parser.add_argument(
        '--no-clean',
        action='store_true',
        help='If this flag is used, temporary work directory is not cleaned.')
    parser.add_argument(
        '--resume',
        type=str,
        default=None,
        help=
        'Pass the working directory that contains a job store to be resumed.')
    parser.add_argument(
        '--cores',
        type=int,
        default=None,
        help=
        'Will set a cap on number of cores to use, default is all available cores.'
    )
    parser.add_argument('--bamqc',
                        action='store_true',
                        default=None,
                        help='Enable BAM QC step. Disabled by default')
    parser.add_argument(
        '--work_mount',
        required=True,
        help='Mount where intermediate files should be written. This directory '
        'should be mirror mounted into the container.')
    parser.add_argument('--output-basename',
                        default="",
                        help='Base name to use for naming the output files ')
    # although we don't actually set the log level in this module, the option is propagated to toil. For this reason
    # we want the logging options to show up with we run --help
    addLoggingOptions(parser)
    toilLoggingOption = None
    for arg in sys.argv:
        if 'log' in arg:
            toilLoggingOption = arg
            sys.argv.remove(toilLoggingOption)
            break
    args = parser.parse_args()
    args.toilLoggingOption = toilLoggingOption
    # If no arguments provided, print full help menu
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    # Get name of most recent running container. If socket is mounted, should be this one.
    try:
        name = subprocess.check_output(
            ['docker', 'ps', '--format', '{{.Names}}']).split('\n')[0]
    except subprocess.CalledProcessError as e:
        raise RuntimeError(
            'No container detected, ensure Docker is being run with: '
            '"-v /var/run/docker.sock:/var/run/docker.sock" as an argument. \n\n{}'
            .format(e.message))
    # Get name of mounted volume
    blob = json.loads(subprocess.check_output(['docker', 'inspect', name]))
    mounts = blob[0]['Mounts']
    # Ensure docker.sock is mounted correctly
    sock_mount = [
        x['Source'] == x['Destination'] for x in mounts
        if 'docker.sock' in x['Source']
    ]
    require(
        len(sock_mount) == 1, 'Missing socket mount. Requires the following: '
        'docker run -v /var/run/docker.sock:/var/run/docker.sock')
    work_mount = args.work_mount
    #create work_mount directories if they don't exist yet.
    cmd = ["mkdir", "-p", work_mount]
    log.info('Creating directory: %s', work_mount)
    subprocess.call(cmd)
    curr_mount = os.path.join(os.getcwd(), work_mount)
    cmd = ["mkdir", "-p", curr_mount]
    log.info('Creating directory: %s', curr_mount)
    subprocess.call(cmd)

    for samples in [args.sample_tar, args.sample_paired, args.sample_single]:
        if not samples:
            continue
        # If sample is given as relative path, assume it's in the work directory
        if not all(x.startswith('/') for x in samples):
            samples = [
                os.path.join(work_mount, x) for x in samples
                if not x.startswith('/')
            ]
            log.info(
                '\nSample given as relative path, assuming sample is in work directory: {}'
                .format(work_mount[0]))
        # Enforce file input standards
        require(
            all(x.startswith('/') for x in samples),
            "Sample inputs must point to a file's full path, "
            "e.g. '/full/path/to/sample1.tar'. You provided %s", str(samples))
        if samples == args.sample_tar:
            log.info('TARs to run: {}'.format('\t'.join(args.sample_tar)))
        if samples == args.sample_paired:
            log.info('Paired FASTQS to run: {}'.format('\t'.join(
                args.sample_paired)))
        if samples == args.sample_single:
            log.info('Single FASTQS to run: {}'.format('\t'.join(
                args.sample_single)))
    require(
        all(x.startswith('/') for x in [args.star, args.kallisto, args.rsem]),
        "Sample inputs must point to a file's full path, "
        "e.g. '/full/path/to/kallisto_hg38.idx'.")
    # Output log information
    log.info('The work mount is: {}'.format(work_mount))
    log.info('Pipeline input locations: \n{}\n{}\n{}'.format(
        args.star, args.rsem, args.kallisto))
    call_pipeline(work_mount, args)