Ejemplo n.º 1
0
 def test_add_runner_option_with_argparse(self):
     """add_runner_option enables '--runner' with ArgumentParser
     """
     p = ArgumentParser()
     add_runner_option(p)
     args = p.parse_args(['--runner', 'SimpleJobRunner'])
     self.assertEqual(args.runner, 'SimpleJobRunner')
Ejemplo n.º 2
0
 def test_add_runner_option(self):
     """add_runner_option enables '--runner'
     """
     # Skip the test if optparse not available
     if not OPTPARSE_AVAILABLE:
         raise unittest.SkipTest("'optparse' not available")
     p = OptionParser()
     add_runner_option(p)
     options, args = p.parse_args(['--runner', 'SimpleJobRunner'])
     self.assertEqual(options.runner, 'SimpleJobRunner')
def add_analyse_barcodes_command(cmdparser):
    """Create a parser for the 'analyse_barcodes' command
    """
    p = cmdparser.add_command('analyse_barcodes',help="Analyse index (barcode) sequences",
                              usage="%prog analyse_barcodes [OPTIONS] [ANALYSIS_DIR]",
                              version="%prog "+__version__,
                              description="Analyse barcode sequences for fastq files "
                              "in specified lanes in ANALYSIS_DIR, and report the most "
                              "common barcodes found across all reads from each lane.")
    p.add_option('--unaligned-dir',action='store',
                 dest='unaligned_dir',default='bcl2fastq',
                 help="explicitly set the (sub)directory with bcl-to-fastq outputs")
    p.add_option('--lanes',action='store',
                 dest='lanes',default=None,
                 help="specify which lanes to analyse barcodes for (default is to do "
                 "analysis for all lanes).")
    p.add_option('--mismatches',action='store',dest='mismatches',
                 default=0,type='int',
                 help="maximum number of mismatches to use when grouping "
                 "similar barcodes (default is 0, i.e. no grouping)")
    p.add_option('--cutoff',action='store',dest='cutoff',
                 default=0.001,type='float',
                 help="exclude barcodes with a smaller fraction of "
                 "associated reads than CUTOFF, e.g. '0.01' excludes "
                 "barcodes with < 1% of reads (default is 0.01%)")
    p.add_option('--sample-sheet',action="store",
                 dest="sample_sheet",default=None,
                 help="use an alternative sample sheet to the default "
                 "'custom_SampleSheet.csv' created on setup.")
    p.add_option('--barcode-analysis-dir',action="store",
                 dest="barcode_analysis_dir",default=None,
                 help="specify subdirectory where barcode analysis will "
                 "be performed and outputs will be written")
    add_runner_option(p)
    add_debug_option(p)
    # Deprecated options
    deprecated = optparse.OptionGroup(p,'Deprecated/defunct options')
    deprecated.add_option('--nprocessors',action='store',
                          dest='nprocessors',default=None,type='int',
                          help="does nothing; kept for backwards "
                          "compatibility only")
    deprecated.add_option('--truncate',action='store',
                          dest='length',default=None,type='int',
                          help="does nothing; kept for backwards "
                          "compatibility only")
    p.add_option_group(deprecated)
def add_update_fastq_stats_command(cmdparser):
    """Create a parser for the 'update_fastq_stats' command
    """
    p = cmdparser.add_command('update_fastq_stats',help="(Re)generate Fastq statistics",
                              usage="%prog update_fastq_stats [OPTIONS] [ANALYSIS_DIR]",
                              description="(Re)generate statistics for fastq "
                              "files produced from 'make_fastqs'.")
    p.add_option('--unaligned-dir',action='store',
                 dest='unaligned_dir',default='bcl2fastq',
                 help="explicitly set the (sub)directory with bcl-to-fastq outputs")
    p.add_option('--stats-file',action='store',
                 dest='stats_file',default=None,
                 help="specify output file for fastq statistics")
    p.add_option('--per-lane-stats-file',action='store',
                 dest='per_lane_stats_file',default=None,
                 help="specify output file for per-lane statistics")
    add_nprocessors_option(p,__settings.fastq_stats.nprocessors)
    add_runner_option(p)
    add_debug_option(p)
def add_run_qc_command(cmdparser):
    """Create a parser for the 'run_qc' command
    """
    p = cmdparser.add_command('run_qc',help="Run QC procedures",
                              usage="%prog run_qc [OPTIONS] [ANALYSIS_DIR]",
                              description="Run QC procedures for sequencing projects in "
                              "ANALYSIS_DIR.")
    max_concurrent_jobs = __settings.general.max_concurrent_jobs
    fastq_screen_subset = 1000000
    p.add_option('--projects',action='store',
                 dest='project_pattern',default=None,
                 help="simple wildcard-based pattern specifying a subset of projects "
                 "and samples to run the QC on. PROJECT_PATTERN should be of the form "
                 "'pname[/sname]', where 'pname' specifies a project (or set of "
                 "projects) and 'sname' optionally specifies a sample (or set of "
                 "samples).")
    p.add_option('--fastq_screen_subset',action='store',dest='subset',
                 type='int',default=fastq_screen_subset,
                 help="specify size of subset of total reads to use for "
                 "fastq_screen (i.e. --subset option); (default %d, set to "
                 "0 to use all reads)" % fastq_screen_subset)
    p.add_option('--ungzip-fastqs',action='store_true',dest='ungzip_fastqs',
                 help="create decompressed copies of fastq.gz files")
    p.add_option('--max-jobs',action='store',
                 dest='max_jobs',default=max_concurrent_jobs,type='int',
                 help="explicitly specify maximum number of concurrent QC jobs to run "
                 "(default %s, change in settings file)" % max_concurrent_jobs)
    add_runner_option(p)
    add_modulefiles_option(p)
    add_debug_option(p)
    # Deprecated options
    deprecated = optparse.OptionGroup(p,'Deprecated/defunct options')
    deprecated.add_option('--no-ungzip-fastqs',action='store_true',dest='no_ungzip_fastqs',
                          help="don't create uncompressed copies of fastq.gz files "
                          "(does nothing; this is now the default, use --ungzip-fastqs "
                          "to turn on decompression)")
    p.add_option_group(deprecated)
def add_make_fastqs_command(cmdparser):
    """Create a parser for the 'make_fastqs' command
    """
    p = cmdparser.add_command('make_fastqs',help="Run Fastq generation",
                              usage="%prog make_fastqs [OPTIONS] [ANALYSIS_DIR]",
                              description="Generate fastq files from raw bcl files "
                              "produced by Illumina sequencer within ANALYSIS_DIR.")
    # General options
    add_no_save_option(p)
    add_modulefiles_option(p)
    add_debug_option(p)
    # Primary data management
    primary_data = optparse.OptionGroup(p,'Primary data management')
    primary_data.add_option('--only-fetch-primary-data',action='store_true',
                            dest='only_fetch_primary_data',default=False,
                            help="only fetch the primary data, don't perform any other "
                            "operations")
    primary_data.add_option('--skip-rsync',action='store_true',
                            dest='skip_rsync',default=False,
                            help="don't rsync the primary data at the beginning of processing")
    primary_data.add_option('--remove-primary-data',action='store_true',
                            dest='remove_primary_data',default=False,
                            help="Delete the primary data at the end of processing (default "
                            "is to keep data)")
    p.add_option_group(primary_data)
    # Options to control bcl2fastq
    bcl_to_fastq = optparse.OptionGroup(p,'Bcl-to-fastq options')
    bcl_to_fastq.add_option('--skip-bcl2fastq',action='store_true',
                            dest='skip_bcl2fastq',default=False,
                            help="don't run the Fastq generation step")
    bcl_to_fastq.add_option('--output-dir',action='store',
                            dest='unaligned_dir',default=None,
                            help="explicitly set the output (sub)directory for bcl-to-fastq "
                            "conversion (overrides default)")
    bcl_to_fastq.add_option('--use-bases-mask',action="store",
                            dest="bases_mask",default=None,
                            help="explicitly set the bases-mask string to indicate how each "
                            "cycle should be used in the bcl-to-fastq conversion (overrides "
                            "default)")
    bcl_to_fastq.add_option('--sample-sheet',action="store",
                            dest="sample_sheet",default=None,
                            help="use an alternative sample sheet to the default "
                            "'custom_SampleSheet.csv' created on setup.")
    bcl_to_fastq.add_option('--ignore-missing-bcl',action='store_true',
                            dest='ignore_missing_bcl',default=False,
                            help="use the --ignore-missing-bcl option for bcl2fastq (treat "
                            "missing bcl files as no call)")
    bcl_to_fastq.add_option('--ignore-missing-stats',action='store_true',
                            dest='ignore_missing_stats',default=False,
                            help="use the --ignore-missing-stats option for bcl2fastq (fill "
                            "in with zeroes when *.stats files are missing)")
    bcl_to_fastq.add_option('--require-bcl2fastq-version',action='store',
                            dest='bcl2fastq_version',default=None,
                            help="explicitly specify version of bcl2fastq "
                            "software to use (e.g. '1.8.4' or '>=2.0').")
    # Use lane splitting
    # Determine defaults to report to user
    no_lane_splitting_platforms = []
    use_lane_splitting_platforms = []
    for platform in __settings.platform:
        if __settings.platform[platform].no_lane_splitting is not None:
            if __settings.platform[platform].no_lane_splitting:
                no_lane_splitting_platforms.append(platform)
            else:
                use_lane_splitting_platforms.append(platform)
    if __settings.bcl2fastq.no_lane_splitting:
        if use_lane_splitting_platforms:
            default_no_lane_splitting = \
                                        "Used by default for all platforms except %s" % \
                                        ', '.join(use_lane_splitting_platforms)
            default_use_lane_splitting = "Used by default for %s" % \
                                         ', '.join(use_lane_splitting_platforms)
        else:
            default_no_lane_splitting = "Default for all platforms"
            default_use_lane_splitting = ""
    else:
        if no_lane_splitting_platforms:
            default_use_lane_splitting = \
                                        "Used by default for all platforms except %s" % \
                                        ', '.join(no_lane_splitting_platforms)
            default_no_lane_splitting = "Used by default for %s" % \
                                         ', '.join(no_lane_splitting_platforms)
        else:
            default_no_lane_splitting = ""
            default_use_lane_splitting = "Default for all platforms"
    if default_use_lane_splitting:
        default_use_lane_splitting = ". "+default_use_lane_splitting
    if default_no_lane_splitting:
        default_no_lane_splitting = ". "+default_no_lane_splitting
    bcl_to_fastq.add_option('--no-lane-splitting',action='store_true',
                            dest='no_lane_splitting',default=False,
                            help="don't split the output FASTQ files by lane "
                            "(bcl2fastq v2 only; turn off using "
                            "--use-lane-splitting)%s" % default_no_lane_splitting)
    bcl_to_fastq.add_option('--use-lane-splitting',action='store_true',
                            dest='use_lane_splitting',default=False,
                            help="split the output FASTQ files by lane "
                            "(bcl2fastq v2 only; turn off using "
                            "--no-lane-splitting)%s" % default_use_lane_splitting)
    # Adapter trimming/masking options
    bcl_to_fastq.add_option('--minimum-trimmed-read-length',action="store",
                            dest="minimum_trimmed_read_length",default=35,
                            help="Minimum read length after adapter "
                            "trimming. bcl2fastq trims the adapter from "
                            "the read down to this value; if there is more "
                            "adapter match below this length then those "
                            "bases are masked not trimmed (i.e. replaced "
                            "by N rather than removed) (default: 35)")
    bcl_to_fastq.add_option('--mask-short-adapter-reads',action="store",
                            dest="mask_short_adapter_reads",default=22,
                            help="minimum length of unmasked bases that "
                            "a read can be after adapter trimming; reads "
                            "with fewer ACGT bases will be completely "
                            "masked with Ns (default: 22)")
    # Creation of empty fastqs
    bcl_to_fastq.add_option('--create-empty-fastqs',action='store_true',
                            dest='create_empty_fastqs',default=False,
                            help="create 'empty' FASTQ files which weren't "
                            "generated by bcl2fastq because they didn't "
                            "have any reads assigned at the demultiplexing "
                            "stage (NB bcl2fastq must have finished without "
                            "an error for this option to be applied)")
    # Number of processors
    default_nprocessors = []
    for platform in __settings.platform:
        if __settings.platform[platform].nprocessors is not None:
            default_nprocessors.append("%s: %s" % 
                                       (platform,
                                        __settings.platform[platform].nprocessors))
    if default_nprocessors:
        default_nprocessors.append("other platforms: %s" %
                                   __settings.bcl2fastq.nprocessors)
    else:
        default_nprocessors.append("%s" % __settings.bcl2fastq.nprocessors)
    default_nprocessors = ', '.join(default_nprocessors)
    add_nprocessors_option(bcl_to_fastq,None,
                           default_display=default_nprocessors)
    add_runner_option(bcl_to_fastq)
    p.add_option_group(bcl_to_fastq)
    # Statistics
    statistics = optparse.OptionGroup(p,'Statistics generation')
    statistics.add_option('--stats-file',action='store',
                          dest='stats_file',default=None,
                          help="specify output file for fastq statistics")
    statistics.add_option('--per-lane-stats-file',action='store',
                          dest='per_lane_stats_file',default=None,
                          help="specify output file for per-lane statistics")
    statistics.add_option('--no-stats',action='store_true',
                          dest='no_stats',default=False,
                          help="don't generate statistics file; use 'update_fastq_stats' "
                          "command to (re)generate statistics")
    p.add_option_group(statistics)
    # Deprecated options
    deprecated = optparse.OptionGroup(p,'Deprecated/defunct options')
    deprecated.add_option('--keep-primary-data',action='store_true',
                          dest='keep_primary_data',default=False,
                          help="don't delete the primary data at the end of processing "
                          "(does nothing; primary data is kept by default unless "
                          "--remove-primary-data is specified)")
    deprecated.add_option('--generate-stats',action='store_true',
                          dest='generate_stats',default=False,
                          help="(re)generate statistics for fastq files (does nothing; "
                          "statistics are generated by default unless suppressed by "
                          "--no-stats)")
    deprecated.add_option('--report-barcodes',action='store_true',
                          dest='report_barcodes',default=False,
                          help="analyse and report barcode indices for all lanes after "
                          "generating fastq files (deprecated: use the "
                          "'analyse_barcodes' command instead)")
    deprecated.add_option('--barcodes-file',action='store',
                          dest='barcodes_file',default=None,
                          help="specify output file for barcode analysis report "
                          "(deprecated: use the 'analyse_barcodes' command instead)")
    p.add_option_group(deprecated)