Exemple #1
0
def _add_common_chunk_options(p):
    # Order matters!
    add_debug_option(p)
    add_max_nchunks_option(p)
    p = _add_chunk_output_dir_option(p)
    p = add_output_chunk_json_report_option(p)
    return p
Exemple #2
0
def add_options_to_parser(p):
    """
    API function for extending main pbreport arg parser (independently of
    tool contract interface).
    """
    p_wrap = _get_parser_core()
    p_wrap.arg_parser.parser = p
    p.description = __doc__
    add_debug_option(p)
    _add_options_to_parser(p_wrap)
    return p
Exemple #3
0
def _get_more_options(parser):
    """
    Advanced options that won't be exposed via tool contract interface.
    """
    parser.add_argument('--outfile',
        dest='outfile',
        default=None,
        help='Use this option to generate all possible output files. Argument here is the root filename of the output files.')

    # FIXME: Need to add an extra check for this; it can only be used if --useLDA flag is set.
    parser.add_argument('--m5Cgff',
        dest='m5Cgff',
        default=None,
        help='Name of output GFF file containing m5C scores')

    # FIXME: Make sure that this is specified if --useLDA flag is set.
    parser.add_argument('--m5Cclassifer',
                        dest='m5Cclassifier',
                        default=None,
                        help='Specify csv file containing a 127 x 2 matrix')


    parser.add_argument('--csv_h5',
                        dest='csv_h5',
                        default=None,
                        help='Name of csv output to be written in hdf5 format.')

    parser.add_argument('--pickle',
                        dest='pickle',
                        default=None,
                        help='Name of output pickle file.')

    parser.add_argument('--summary_h5',
                        dest='summary_h5',
                        default=None,
                        help='Name of output summary h5 file.')


    parser.add_argument('--ms_csv',
                        dest='ms_csv',
                        default=None,
                        help='Multisite detection CSV file.')


    # Calculation options:


    parser.add_argument('--control',
                        dest='control',
                        default=None,
                        type=validateNoneOrFile,
                        help='cmph.h5 file containing a control sample. Tool will perform a case-control analysis')

    # Temporary addition to test LDA for Ca5C detection:
    parser.add_argument('--useLDA',
                        action="store_true",
                        dest='useLDA',
                        default=False,
                        help='Set this flag to debug LDA for m5C/Ca5C detection')



    # Parameter options:

    parser.add_argument('--paramsPath',
                        dest='paramsPath',
                        default=_getResourcePath(),
                        type=validateNoneOrDir,
                        help='Directory containing in-silico trained model for each chemistry')

    parser.add_argument('--minCoverage',
                        dest='minCoverage',
                        default=3,
                        type=int,
                        help='Minimum coverage required to call a modified base')

    parser.add_argument('--maxQueueSize',
                        dest='maxQueueSize',
                        default=20,
                        type=int,
                        help='Max Queue Size')

    parser.add_argument('--maxCoverage',
                        dest='maxCoverage',
                        type=int, default=-1,
                        help='Maximum coverage to use at each site')

    parser.add_argument('--mapQvThreshold',
                        dest='mapQvThreshold',
                        type=float,
                        default=-1.0)

    parser.add_argument('--ipdModel',
                        dest='ipdModel',
                        default=None,
                        help='Alternate synthetic IPD model HDF5 file')

    parser.add_argument('--modelIters',
                        dest='modelIters',
                        type=int,
                        default=-1,
                        help='[Internal] Number of GBM model iteration to use')

    parser.add_argument('--cap_percentile',
                        dest='cap_percentile',
                        type=float,
                        default=99.0,
                        help='Global IPD percentile to cap IPDs at')


    parser.add_argument("--methylMinCov",
                        type=int,
                        dest='methylMinCov',
                        default=10,
                        help="Do not try to estimate methylFraction unless coverage is at least this.")

    parser.add_argument("--identifyMinCov",
                        type=int,
                        dest='identifyMinCov',
                        default=5,
                        help="Do not try to identify the modification type unless coverage is at least this.")

    parser.add_argument("--maxAlignments",
                        type=int,
                        dest="maxAlignments",
                        default=1500,
                        help="Maximum number of alignments to use for a given window")


    # Computation management options:

    parser.add_argument("-w", "--referenceWindow", "--referenceWindows",
                             "--refContigs", # backwards compatibility
                             type=str,
                             dest='referenceWindowsAsString',
                             default=None,
                             help="The window (or multiple comma-delimited windows) of the reference to " + \
                                  "be processed, in the format refGroup[:refStart-refEnd] "               + \
                                  "(default: entire reference).")

    def slurpWindowFile(fname):
        return ",".join(map(str.strip, open(fname).readlines()))


    parser.add_argument("--refContigIndex", type=int, dest='refContigIndex', default=-1,
                             help="For debugging purposes only - rather than enter a reference contig name, simply enter an index" ) 

    parser.add_argument("-W", "--referenceWindowsFile",
                        "--refContigsFile", # backwards compatibility
                        type=slurpWindowFile,
                        dest='referenceWindowsAsString',
                        default=None,
                        help="A file containing reference window designations, one per line")

    parser.add_argument("--skipUnrecognizedContigs",
                        type=bool,
                        default=False,
                        help="Whether to skip, or abort, unrecognized contigs in the -w/-W flags")
    # FIXME shouldn't it always do this?
    parser.add_argument("--alignmentSetRefWindows",
        action="store_true",
        dest="referenceWindowsFromAlignment",
        help="Use refWindows in dataset")
    
    # Debugging help options:

    parser.add_argument("--threaded", "-T",
                        action="store_true",
                        dest="threaded",
                        default=False,
                        help="Run threads instead of processes (for debugging purposes only)")

    parser.add_argument("--profile",
                        action="store_true",
                        dest="doProfiling",
                        default=False,
                        help="Enable Python-level profiling (using cProfile).")

    add_debug_option(parser)

    parser.add_argument("--seed",
                        action="store",
                        dest="randomSeed",
                        type=int,
                        default=None,
                        help="Random seed (for development and debugging purposes only)")

    parser.add_argument("--referenceStride", action="store", type=int,
                        default=1000,
                        help="Size of reference window in internal "+
                             "parallelization.  For testing purposes only.")

    return parser
def add_options_to_argument_parser(parser):

    def canonicalizedFilePath(path):
        return os.path.abspath(os.path.expanduser(path))

    basics = parser.add_argument_group("Basic required options")
    basics.add_argument(
        "inputFilename",
        type=canonicalizedFilePath,
        help="The input cmp.h5 or BAM alignment file")
    basics.add_argument(
        "--referenceFilename", "--reference", "-r",
        action="store",
        dest="referenceFilename",
        type=canonicalizedFilePath,
        required=True,
        help="The filename of the reference FASTA file")
    basics.add_argument(
        "-o", "--outputFilename",
        dest="outputFilenames",
        required=True,
        type=str,
        action="append",
        default=[],
        help="The output filename(s), as a comma-separated list." + \
             "Valid output formats are .fa/.fasta, .fq/.fastq, .gff, .vcf")

    parallelism = parser.add_argument_group("Parallelism")
    parallelism.add_argument(
        "-j", "--numWorkers",
        dest="numWorkers",
        type=int,
        default=1,
        help="The number of worker processes to be used")

    filtering = parser.add_argument_group("Output filtering")
    filtering.add_argument(
        "--minConfidence", "-q",
        action="store",
        dest="minConfidence",
        type=int,
        default=Constants.DEFAULT_MIN_CONFIDENCE,
        help="The minimum confidence for a variant call to be output to variants.{gff,vcf}")
    filtering.add_argument(
        "--minCoverage", "-x",
        action="store",
        dest="minCoverage",
        default=Constants.DEFAULT_MIN_COVERAGE,
        type=int,
        help="The minimum site coverage that must be achieved for variant calls and " + \
             "consensus to be calculated for a site.")
    filtering.add_argument(
        "--noEvidenceConsensusCall",
        action="store",
        choices=["nocall", "reference", "lowercasereference"],
        default="lowercasereference",
        help="The consensus base that will be output for sites with no effective coverage.")


    readSelection = parser.add_argument_group("Read selection/filtering")
    readSelection.add_argument(
        "--coverage", "-X",
        action="store",
        dest="coverage",
        type=int,
        default=Constants.DEFAULT_MAX_COVERAGE,
        help="A designation of the maximum coverage level to be used for analysis." + \
             " Exact interpretation is algorithm-specific.")
    readSelection.add_argument(
        "--minMapQV", "-m",
        action="store",
        dest="minMapQV",
        type=float,
        default=Constants.DEFAULT_MIN_MAPQV,
        help="The minimum MapQV for reads that will be used for analysis.")
    # Since the reference isn't loaded at options processing time, we
    # can't grok the referenceWindow specified until later.  We store
    # it as a string (referenceWindowsAsString) and it will later be
    # interpreted and stored as a proper window tuple (referenceWindow)
    readSelection.add_argument(
        "--referenceWindow", "--referenceWindows", "-w",
        action="store",
        dest="referenceWindowsAsString",
        type=str,
        help="The window (or multiple comma-delimited windows) of the reference to " + \
             "be processed, in the format refGroup:refStart-refEnd "                 + \
             "(default: entire reference).",
        default=None)

    readSelection.add_argument(
        "--alignmentSetRefWindows",
        action="store_true",
        dest="referenceWindowsFromAlignment",
        help="The window (or multiple comma-delimited windows) of the reference to " + \
             "be processed, in the format refGroup:refStart-refEnd "                 + \
             "will be pulled from the alignment file.",
        default=False)

    def slurpWindowFile(fname):
        return ",".join(map(str.strip, open(fname).readlines()))

    readSelection.add_argument(
        "--referenceWindowsFile", "-W",
        action="store",
        dest="referenceWindowsAsString",
        type=slurpWindowFile,
        help="A file containing reference window designations, one per line",
        default=None)
    readSelection.add_argument(
        "--barcode",
        type=str,
        dest="_barcode",
        help="Only process reads with the given barcode name.")
    def parseReadStratum(s):
        rs = map(int, s.split("/"))
        assert len(rs) == 2
        assert rs[0] < rs[1]
        return rs
    readSelection.add_argument(
        "--readStratum",
        help="A string of the form 'n/N', where n, and N are integers, 0 <= n < N, designating" \
             " that the reads are to be deterministically split into N strata of roughly even"  \
             " size, and stratum n is to be used for variant and consensus calling.  This is"   \
             " mostly useful for Quiver development.",
        dest="readStratum",
        default=None,
        type=parseReadStratum)
    readSelection.add_argument(
        "--minReadScore",
        action="store",
        dest="minReadScore",
        type=float,
        default=Constants.DEFAULT_MIN_READSCORE,
        help="The minimum ReadScore for reads that will be used for analysis (arrow-only).")
    readSelection.add_argument(
        "--minSnr",
        action="store",
        dest="minHqRegionSnr",
        type=float,
        default=Constants.DEFAULT_MIN_HQREGIONSNR,
        help="The minimum acceptable signal-to-noise over all channels for reads that will be used for analysis (arrow-only).")
    readSelection.add_argument(
        "--minZScore",
        action="store",
        dest="minZScore",
        type=float,
        default=Constants.DEFAULT_MIN_ZSCORE,
        help="The minimum acceptable z-score for reads that will be used for analysis (arrow-only).")
    readSelection.add_argument(
        "--minAccuracy",
        action="store",
        dest="minAccuracy",
        type=float,
        default=Constants.DEFAULT_MIN_ACCURACY,
        help="The minimum acceptable window-global alignment accuracy for reads that will be used for the analysis (arrow-only).")

    algorithm = parser.add_argument_group("Algorithm and parameter settings")
    algorithm.add_argument(
        "--algorithm",
        action="store",
        dest="algorithm",
        type=str,
        choices=Constants.ALGORITHM_CHOICES,
        default=Constants.DEFAULT_ALGORITHM)
    algorithm.add_argument(
        "--parametersFile", "-P",
        dest="parametersFile",
        type=str,
        default=None,
        help="Parameter set filename (such as ArrowParameters.json or "       + \
             "QuiverParameters.ini), or directory D such that either "        + \
             "D/*/GenomicConsensus/QuiverParameters.ini, "                    + \
             "or D/GenomicConsensus/QuiverParameters.ini, is found.  In the " + \
             "former case, the lexically largest path is chosen.")
    algorithm.add_argument(
        "--parametersSpec", "-p",
        action="store",
        dest="parametersSpec",
        type=str,
        default="auto",
        help="Name of parameter set (chemistry.model) to select from the "   + \
             "parameters file, or just the name of the chemistry, in which " + \
             "case the best available model is chosen.  Default is 'auto', " + \
             "which selects the best parameter set from the alignment data")
    algorithm.add_argument(
        "--maskRadius",
        dest="maskRadius",
        type=int,
        default=Constants.DEFAULT_MASK_RADIUS,
        help="Radius of window to use when excluding local regions for " + \
             "exceeding maskMinErrorRate, where 0 disables any filtering (arrow-only).")
    algorithm.add_argument(
        "--maskErrorRate",
        dest="maskErrorRate",
        type=float,
        default=Constants.DEFAULT_MASK_ERROR_RATE,
        help="Maximum local error rate before the local region defined by " + \
             "maskRadius is excluded from polishing (arrow-only).")

    debugging = parser.add_argument_group("Verbosity and debugging/profiling")
    add_debug_option(debugging)
    debugging.add_argument(
        "--notrace",
        action="store_true",
        dest="notrace",
        default=False,
        help="Suppress stacktrace for exceptions (to simplify testing)")
    debugging.add_argument(
        "--pdbAtStartup",
        action="store_true",
        dest="pdbAtStartup",
        default=False,
        help="Drop into Python debugger at startup (requires ipdb)")
    debugging.add_argument(
        "--profile",
        action="store_true",
        dest="doProfiling",
        default=False,
        help="Enable Python-level profiling (using cProfile).")
    debugging.add_argument(
        "--annotateGFF",
        action="store_true",
        help="Augment GFF variant records with additional information")
    debugging.add_argument(
        "--reportEffectiveCoverage",
        action="store_true",
        help="Additionally record the *post-filtering* coverage at variant sites")

    advanced = parser.add_argument_group("Advanced configuration options")
    advanced.add_argument(
        "--diploid",
        action="store_true",
        help="Enable detection of heterozygous variants (experimental)")
    advanced.add_argument(
        "--queueSize", "-Q",
        action="store",
        dest="queueSize",
        type=int,
        default=200)
    advanced.add_argument(
        "--threaded", "-T",
        action="store_true",
        dest="threaded",
        default=False,
        help="Run threads instead of processes (for debugging purposes only)")
    advanced.add_argument(
        "--referenceChunkSize", "-C",
        action="store",
        dest="referenceChunkSize",
        type=int,
        default=500)
    advanced.add_argument(
        "--fancyChunking",
        default=True,
        action="store_true",
        help="Adaptive reference chunking designed to handle coverage cutouts better")
    advanced.add_argument(
        "--simpleChunking",
        dest="fancyChunking",
        action="store_false",
        help="Disable adaptive reference chunking")
    advanced.add_argument(
        "--referenceChunkOverlap",
        action="store",
        dest="referenceChunkOverlap",
        type=int,
        default=5)
    advanced.add_argument(
        "--autoDisableHdf5ChunkCache",
        action="store",
        type=int,
        default=500,
        help="Disable the HDF5 chunk cache when the number of datasets in the cmp.h5 " + \
             "exceeds the given threshold")
    advanced.add_argument(
        "--aligner", "-a",
        action="store",
        choices=["affine", "simple"],
        default="affine",
        help="The pairwise alignment algorithm that will be used to produce variant calls" \
             " from the consensus (Quiver only).")
    advanced.add_argument(
        "--refineDinucleotideRepeats",
        dest="refineDinucleotideRepeats",
        action="store_true",
        help="Require quiver maximum likelihood search to try one less/more repeat copy in"  \
             " dinucleotide repeats, which seem to be the most frequent cause of suboptimal" \
             " convergence (getting trapped in local optimum) (Quiver only)")
    advanced.add_argument(
        "--noRefineDinucleotideRepeats",
        dest="refineDinucleotideRepeats",
        action="store_false",
        help="Disable dinucleotide refinement")
    advanced.set_defaults(refineDinucleotideRepeats=True)
    advanced.add_argument(
        "--fast",
        dest="fastMode",
        action="store_true",
        help="Cut some corners to run faster.  Unsupported!")
    advanced.add_argument(
        "--skipUnrecognizedContigs",
        action="store_true",
        help="Do not abort when told to process a reference window (via -w/--referenceWindow[s]) " \
             "that has no aligned coverage.  Outputs emptyish files if there are no remaining "    \
             "non-degenerate windows.  Only intended for use by smrtpipe scatter/gather.")

    return parser
Exemple #5
0
def constructOptionParser(parser, C=Constants, ccs_mode=False):
    """
    Add PBAlignRunner arguments to the parser.
    """
    # save reference to PbParser
    p = parser
    tcp = p.tool_contract_parser
    parser = parser.arg_parser.parser
    #parser.argument_default = argparse.SUPPRESS
    parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
    add_debug_option(parser)

    # Optional input.
    input_group = parser.add_argument_group("Optional input arguments")
    input_group.add_argument("--regionTable",
                        dest="regionTable",
                        type=str,
                        default=None,
                        action="store",
                        help="Specify a region table for filtering reads.")

    input_group.add_argument("--configFile",
                        dest="configFile",
                        default=None,
                        type=str,
                        action="store",
                        help="Specify a set of user-defined argument values.")

    helpstr = "When input reads are in fasta format and output is a cmp.h5\n" + \
              "this option can specify pls.h5 or bas.h5 or \n" + \
              "FOFN files from which pulse metrics can be loaded for Quiver."
    input_group.add_argument("--pulseFile",
                        dest="pulseFile",
                        default=None,
                        type=str,
                        action="store",
                        help=helpstr)

    # Chose an aligner.
    align_group = parser.add_argument_group("Alignment options")
    helpstr = "Select an aligorithm from {0}.\n".format(ALGORITHM_CANDIDATES)
    align_group.add_argument("--algorithm",
                        dest="algorithm",
                        type=str,
                        action="store",
                        choices=ALGORITHM_CANDIDATES,
                        default=ALGORITHM_CANDIDATES[0],
                        help=helpstr)

    # Aligner options.
    helpstr = "The maximum number of matches of each read to the \n" + \
              "reference sequence that will be evaluated."
    align_group.add_argument("--maxHits",
                        dest="maxHits",
                        type=int,
                        default=None,  # Set as None instead of a real number.
                        action="store",
                        help=helpstr)

    helpstr = "The minimum anchor size defines the length of the read\n" + \
              "that must match against the reference sequence."
    align_group.add_argument("--minAnchorSize",
                        dest="minAnchorSize",
                        type=int,
                        default=None,  # Set as None to avoid conflicts with
                                       # --algorithmOptions
                        action="store",
                        help=helpstr)

    # Aligner options: Use ccs or not?
    helpstr = "Map the ccsSequence to the genome first, then align\n" + \
              "subreads to the interval that the CCS reads mapped to.\n" + \
              "  useccs: only maps subreads that span the length of\n" + \
              "          the template.\n" + \
              "  useccsall: maps all subreads.\n" + \
              "  useccsdenovo: maps ccs only."
    align_group.add_argument("--useccs",
                        type=str,
                        choices=["useccs", "useccsall", "useccsdenovo"],
                        action="store",
                        default=None,
                        help=helpstr)

    helpstr = "Do not split reads into subreads even if subread \n" + \
              "regions are available."
    align_group.add_argument("--noSplitSubreads",
                        dest="noSplitSubreads",
                        default=DEFAULT_OPTIONS["noSplitSubreads"],
                        action="store_true",
                        help=helpstr)
    if not ccs_mode:
        tcp.add_boolean(C.NO_SPLIT_ID, "noSplitSubreads",
            default=DEFAULT_OPTIONS["noSplitSubreads"],
            name="Align unsplit polymerase reads",
            description=helpstr)

    helpstr = "Map subreads of a ZMW to the same genomic location.\n"
    align_group.add_argument("--concordant",
                        dest="concordant",
                        default=DEFAULT_OPTIONS["concordant"],
                        action="store_true",
                        help=helpstr)
    if not ccs_mode:
        tcp.add_boolean(C.CONCORDANT_ID, "concordant",
            default=DEFAULT_OPTIONS["concordant"],
            name="Concordant alignment",
            description="Map subreads of a ZMW to the same genomic location")

    helpstr = "Number of threads."
    align_group.add_argument("--nproc",
                        type=int,
                        dest="nproc",
                        default=DEFAULT_OPTIONS["nproc"],
                        #default=15,
                        action="store",
                        help=helpstr)

    align_group.add_argument("--algorithmOptions",
                        type=str,
                        dest="algorithmOptions",
                        default=None,
                        action="append",
                        help="Pass alignment options through.")
    # XXX the arguments used in SMRTpipe 2.3 are different from the defaults
    # for the command line tool
    tcp.add_str(C.ALGORITHM_OPTIONS_ID, "algorithmOptions",
        default=C.ALGORITHM_OPTIONS_DEFAULT,
        name="Algorithm options",
        description="List of space-separated arguments passed to BLASR")

    # Filtering criteria and hit policy.
    filter_group = parser.add_argument_group("Filter criteria options")
    helpstr = "The maximum allowed percentage divergence of a read \n" + \
              "from the reference sequence."
    filter_group.add_argument("--maxDivergence",
                        dest="maxDivergence",
                        type=float,
                        default=DEFAULT_OPTIONS["maxDivergence"],
                        #default=30,
                        action="store",
                        help=helpstr)

    helpstr = "The minimum concordance of alignments that\n" + \
              "will be evaluated."
    filter_group.add_argument("--minAccuracy",
                        dest="minAccuracy",
                        type=float,
                        default=DEFAULT_OPTIONS["minAccuracy"],
                        #default=70,
                        action="store",
                        help=helpstr)
    tcp.add_float(C.MIN_ACCURACY_ID, "minAccuracy",
        default=DEFAULT_OPTIONS["minAccuracy"],
        name="Min. concordance",
        description="Minimum required alignment concordance")

    helpstr = "The minimum aligned read length of alignments that\n" + \
              "will be evaluated."
    filter_group.add_argument("--minLength",
                        dest="minLength",
                        type=int,
                        default=DEFAULT_OPTIONS["minLength"],
                        action="store",
                        help=helpstr)
    tcp.add_int(C.MIN_LENGTH_ID, "minLength",
        default=DEFAULT_OPTIONS["minLength"],
        name="Min. length",
        description="Minimum required alignment length")

    #helpstr = "Specify a score function for evaluating alignments.\n"
    #helpstr += "  alignerscore : aligner's score in the SAM tag 'as'.\n"
    #helpstr += "  editdist     : edit distance between read and reference.\n"
    #helpstr += "  blasrscore   : blasr's default score function.\n"
    #helpstr += "Default value is {0}.".format(DEFAULT_OPTIONS["scoreFunction"])
    #filter_group.add_argument("--scoreFunction",
    #                    dest="scoreFunction",
    #                    type=str,
    #                    choices=SCOREFUNCTION_CANDIDATES,
    #                    default=DEFAULT_OPTIONS["scoreFunction"],
    #                    action="store",
    #                    help=helpstr)
    #"  userscore    : user-defined score matrix (by -scoreMatrix).\n")
    #parser.add_argument("--scoreMatrix",
    #                    dest="scoreMatrix",
    #                    type=str,
    #                    default=None,
    #                    help=
    #                    "Specify a user-defined score matrix for "
    #                    "scoring reads.The matrix\n"+\
    #                    "is in the format\n"
    #                    "    ACGTN\n"
    #                    "  A abcde\n"
    #                    "  C fghij\n"
    #                    "  G klmno\n"
    #                    "  T pqrst\n"
    #                    "  N uvwxy\n"
    #                    ". The values a...y should be input as a "
    #                    "quoted space separated\n"
    #                    "string: "a b c ... y". Lower scores are better,"
    #                    "so matches\n"
    #                    "should be less than mismatches e.g. a,g,m,s "
    #                    "= -5 (match),\n"
    #                    "mismatch = 6.\n")

    filter_group.add_argument("--scoreCutoff",
                        dest="scoreCutoff",
                        type=int,
                        default=None,
                        action="store",
                        help="The worst score to output an alignment.\n")

    helpstr = "Specify a policy for how to treat multiple hit\n" + \
           "  random    : selects a random hit.\n" + \
           "  all       : selects all hits.\n" + \
           "  allbest   : selects all the best score hits.\n" + \
           "  randombest: selects a random hit from all best score hits.\n" + \
           "  leftmost  : selects a hit which has the best score and the\n" + \
           "              smallest mapping coordinate in any reference.\n"
    filter_group.add_argument("--hitPolicy",
                        dest="hitPolicy",
                        type=str,
                        choices=HITPOLICY_CANDIDATES,
                        default=DEFAULT_OPTIONS["hitPolicy"],
                        action="store",
                        help=helpstr)
    tcp.add_str(C.HIT_POLICY_ID, "hitPolicy",
        default=DEFAULT_OPTIONS["hitPolicy"],
        name="Hit policy",
        description=helpstr)

    helpstr = "If specified, do not report adapter-only hits using\n" + \
              "annotations with the reference entry."
    filter_group.add_argument("--filterAdapterOnly",
                        dest="filterAdapterOnly",
                        default=DEFAULT_OPTIONS["filterAdapterOnly"],
                        action="store_true",
                        help=helpstr)

    # Output.
    # CMP H5 output has been deprecated, let's hide associated options.
    cmph5_group = parser.add_argument_group("Options for cmp.h5")
    helpstr = "Specify the ReadType attribute in the cmp.h5 output.\n"
    cmph5_group.add_argument("--readType",
                        dest="readType",
                        type=str,
                        action="store",
                        default=DEFAULT_OPTIONS["readType"],
                        help=argparse.SUPPRESS)

    helpstr = "The output cmp.h5 file which will be sorted, loaded\n" + \
              "with pulse QV information, and repacked, so that it \n" + \
              "can be consumed by quiver directly. This requires\n" + \
              "the input file to be in PacBio bas/pls.h5 format,\n" + \
              "and --useccs must be None."
    cmph5_group.add_argument("--forQuiver",
                        dest="forQuiver",
                        action="store_true",
                        default=DEFAULT_OPTIONS["forQuiver"],
                        help=argparse.SUPPRESS)

    helpstr = "Similar to --forQuiver, the only difference is that \n" + \
              "--useccs can be specified."
    cmph5_group.add_argument("--loadQVs",
                        dest="loadQVs",
                        action="store_true",
                        default=DEFAULT_OPTIONS["loadQVs"],
                        help=argparse.SUPPRESS)

    helpstr = "Load pulse information using -byread option instead\n" + \
              "of -bymetric. Only works when --forQuiver or \n" + \
              "--loadQVs are set."
    cmph5_group.add_argument("--byread",
                        dest="byread",
                        action="store_true",
                        default=DEFAULT_OPTIONS["byread"],
                        help=argparse.SUPPRESS)

    helpstr = "Load the specified (comma-delimited list of) metrics\n" + \
              "instead of the default metrics required by quiver.\n" + \
              "This option only works when --forQuiver  or \n" + \
              "--loadQVs are set."
    cmph5_group.add_argument("--metrics",
                        dest="metrics",
                        type=str,
                        action="store",
                        default=DEFAULT_OPTIONS["metrics"],
                        help=argparse.SUPPRESS)

    # Miscellaneous.
    misc_group = parser.add_argument_group("Miscellaneous options")
    helpstr = "Output names of unaligned reads to specified file."
    misc_group.add_argument("--unaligned",
                            dest="unaligned",
                            type=str,
                            action="store",
                            default=DEFAULT_OPTIONS["unaligned"],
                            help=helpstr)

    helpstr = "Initialize the random number generator with a none-zero \n" + \
              "integer. Zero means that current system time is used.\n"
    misc_group.add_argument("--seed",
                        dest="seed",
                        type=int,
                        default=DEFAULT_OPTIONS["seed"],
                        action="store",
                        help=helpstr)

    helpstr = "Specify a directory for saving temporary files.\n"
    misc_group.add_argument("--tmpDir",
                        dest="tmpDir",
                        type=str,
                        action="store",
                        default=DEFAULT_OPTIONS["tmpDir"],
                        help=helpstr)

    # Keep all temporary & intermediate files.
    misc_group.add_argument("--keepTmpFiles",
                        dest="keepTmpFiles",
                        action="store_true",
                        default=False,
                        help=argparse.SUPPRESS)
    return parser
Exemple #6
0
 def _f(p):
     add_debug_option(p)
     f = __gather_options(output_file_msg, input_file_msg, validate_file, chunk_key_func)
     return f(p)
Exemple #7
0
def _get_more_options(parser):
    """
    Advanced options that won't be exposed via tool contract interface.
    """
    parser.add_argument(
        '--outfile',
        dest='outfile',
        default=None,
        help=
        'Use this option to generate all possible output files. Argument here is the root filename of the output files.'
    )

    # FIXME: Need to add an extra check for this; it can only be used if --useLDA flag is set.
    parser.add_argument('--m5Cgff',
                        dest='m5Cgff',
                        default=None,
                        help='Name of output GFF file containing m5C scores')

    # FIXME: Make sure that this is specified if --useLDA flag is set.
    parser.add_argument('--m5Cclassifer',
                        dest='m5Cclassifier',
                        default=None,
                        help='Specify csv file containing a 127 x 2 matrix')

    parser.add_argument(
        '--csv_h5',
        dest='csv_h5',
        default=None,
        help='Name of csv output to be written in hdf5 format.')

    parser.add_argument('--pickle',
                        dest='pickle',
                        default=None,
                        help='Name of output pickle file.')

    parser.add_argument('--summary_h5',
                        dest='summary_h5',
                        default=None,
                        help='Name of output summary h5 file.')

    parser.add_argument('--ms_csv',
                        dest='ms_csv',
                        default=None,
                        help='Multisite detection CSV file.')

    # Calculation options:

    parser.add_argument(
        '--control',
        dest='control',
        default=None,
        type=validateNoneOrFile,
        help=
        'cmph.h5 file containing a control sample. Tool will perform a case-control analysis'
    )

    # Temporary addition to test LDA for Ca5C detection:
    parser.add_argument(
        '--useLDA',
        action="store_true",
        dest='useLDA',
        default=False,
        help='Set this flag to debug LDA for m5C/Ca5C detection')

    # Parameter options:
    defaultParamsPathSpec = _getResourcePathSpec()
    parser.add_argument(
        '--paramsPath',
        dest='paramsPath',
        default=defaultParamsPathSpec,
        type=validateNoneOrPathSpec,
        help=
        'List of :-delimited directory paths containing in-silico trained models (default is "%s")'
        % defaultParamsPathSpec)

    parser.add_argument(
        '--minCoverage',
        dest='minCoverage',
        default=3,
        type=int,
        help='Minimum coverage required to call a modified base')

    parser.add_argument('--maxQueueSize',
                        dest='maxQueueSize',
                        default=20,
                        type=int,
                        help='Max Queue Size')

    parser.add_argument('--maxCoverage',
                        dest='maxCoverage',
                        type=int,
                        default=-1,
                        help='Maximum coverage to use at each site')

    parser.add_argument('--mapQvThreshold',
                        dest='mapQvThreshold',
                        type=float,
                        default=-1.0)

    parser.add_argument('--ipdModel',
                        dest='ipdModel',
                        default=None,
                        type=validateNoneOrFile,
                        help='Alternate synthetic IPD model HDF5 file')

    parser.add_argument('--modelIters',
                        dest='modelIters',
                        type=int,
                        default=-1,
                        help='[Internal] Number of GBM model iteration to use')

    parser.add_argument('--cap_percentile',
                        dest='cap_percentile',
                        type=float,
                        default=99.0,
                        help='Global IPD percentile to cap IPDs at')

    parser.add_argument(
        "--methylMinCov",
        type=int,
        dest='methylMinCov',
        default=10,
        help=
        "Do not try to estimate methylFraction unless coverage is at least this."
    )

    parser.add_argument(
        "--identifyMinCov",
        type=int,
        dest='identifyMinCov',
        default=5,
        help=
        "Do not try to identify the modification type unless coverage is at least this."
    )

    parser.add_argument(
        "--maxAlignments",
        type=int,
        dest="maxAlignments",
        default=1500,
        help="Maximum number of alignments to use for a given window")

    # Computation management options:

    parser.add_argument("-w", "--referenceWindow", "--referenceWindows",
                             "--refContigs", # backwards compatibility
                             type=str,
                             dest='referenceWindowsAsString',
                             default=None,
                             help="The window (or multiple comma-delimited windows) of the reference to " + \
                                  "be processed, in the format refGroup[:refStart-refEnd] "               + \
                                  "(default: entire reference).")

    def slurpWindowFile(fname):
        return ",".join(map(str.strip, open(fname).readlines()))

    parser.add_argument(
        "--refContigIndex",
        type=int,
        dest='refContigIndex',
        default=-1,
        help=
        "For debugging purposes only - rather than enter a reference contig name, simply enter an index"
    )

    parser.add_argument(
        "-W",
        "--referenceWindowsFile",
        "--refContigsFile",  # backwards compatibility
        type=slurpWindowFile,
        dest='referenceWindowsAsString',
        default=None,
        help="A file containing reference window designations, one per line")

    parser.add_argument(
        "--skipUnrecognizedContigs",
        type=bool,
        default=False,
        help=
        "Whether to skip, or abort, unrecognized contigs in the -w/-W flags")
    # FIXME shouldn't it always do this?
    parser.add_argument("--alignmentSetRefWindows",
                        action="store_true",
                        dest="referenceWindowsFromAlignment",
                        help="Use refWindows in dataset")

    # Debugging help options:

    parser.add_argument(
        "--threaded",
        "-T",
        action="store_true",
        dest="threaded",
        default=False,
        help="Run threads instead of processes (for debugging purposes only)")

    parser.add_argument("--profile",
                        action="store_true",
                        dest="doProfiling",
                        default=False,
                        help="Enable Python-level profiling (using cProfile).")

    add_debug_option(parser)

    parser.add_argument(
        "--seed",
        action="store",
        dest="randomSeed",
        type=int,
        default=None,
        help="Random seed (for development and debugging purposes only)")

    parser.add_argument("--referenceStride",
                        action="store",
                        type=int,
                        default=1000,
                        help="Size of reference window in internal " +
                        "parallelization.  For testing purposes only.")

    return parser
Exemple #8
0
def constructOptionParser(parser, C=Constants, ccs_mode=False):
    """
    Add PBAlignRunner arguments to the parser.
    """
    # save reference to PbParser
    p = parser
    tcp = p.tool_contract_parser
    parser = parser.arg_parser.parser
    #parser.argument_default = argparse.SUPPRESS
    parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
    add_debug_option(parser)

    # Optional input.
    input_group = parser.add_argument_group("Optional input arguments")
    input_group.add_argument(
        "--regionTable",
        dest="regionTable",
        type=str,
        default=None,
        action="store",
        help="Specify a region table for filtering reads.")

    input_group.add_argument(
        "--configFile",
        dest="configFile",
        default=None,
        type=str,
        action="store",
        help="Specify a set of user-defined argument values.")

    helpstr = "When input reads are in fasta format and output is a cmp.h5\n" + \
              "this option can specify pls.h5 or bas.h5 or \n" + \
              "FOFN files from which pulse metrics can be loaded for Quiver."
    input_group.add_argument("--pulseFile",
                             dest="pulseFile",
                             default=None,
                             type=str,
                             action="store",
                             help=helpstr)

    # Chose an aligner.
    align_group = parser.add_argument_group("Alignment options")
    helpstr = "Select an aligorithm from {0}.\n".format(ALGORITHM_CANDIDATES)
    align_group.add_argument("--algorithm",
                             dest="algorithm",
                             type=str,
                             action="store",
                             choices=ALGORITHM_CANDIDATES,
                             default=ALGORITHM_CANDIDATES[0],
                             help=helpstr)

    # Aligner options.
    helpstr = "The maximum number of matches of each read to the \n" + \
              "reference sequence that will be evaluated."
    align_group.add_argument(
        "--maxHits",
        dest="maxHits",
        type=int,
        default=None,  # Set as None instead of a real number.
        action="store",
        help=helpstr)

    helpstr = "The minimum anchor size defines the length of the read\n" + \
              "that must match against the reference sequence."
    align_group.add_argument(
        "--minAnchorSize",
        dest="minAnchorSize",
        type=int,
        default=None,  # Set as None to avoid conflicts with
        # --algorithmOptions
        action="store",
        help=helpstr)

    # Aligner options: Use ccs or not?
    helpstr = "Map the ccsSequence to the genome first, then align\n" + \
              "subreads to the interval that the CCS reads mapped to.\n" + \
              "  useccs: only maps subreads that span the length of\n" + \
              "          the template.\n" + \
              "  useccsall: maps all subreads.\n" + \
              "  useccsdenovo: maps ccs only."
    align_group.add_argument("--useccs",
                             type=str,
                             choices=["useccs", "useccsall", "useccsdenovo"],
                             action="store",
                             default=None,
                             help=helpstr)

    helpstr = "Do not split reads into subreads even if subread \n" + \
              "regions are available."
    align_group.add_argument("--noSplitSubreads",
                             dest="noSplitSubreads",
                             default=DEFAULT_OPTIONS["noSplitSubreads"],
                             action="store_true",
                             help=helpstr)
    if not ccs_mode:
        tcp.add_boolean(C.NO_SPLIT_ID,
                        "noSplitSubreads",
                        default=DEFAULT_OPTIONS["noSplitSubreads"],
                        name="Align unsplit polymerase reads",
                        description=helpstr)

    helpstr = "Map subreads of a ZMW to the same genomic location.\n"
    align_group.add_argument("--concordant",
                             dest="concordant",
                             default=DEFAULT_OPTIONS["concordant"],
                             action="store_true",
                             help=helpstr)
    if not ccs_mode:
        tcp.add_boolean(
            C.CONCORDANT_ID,
            "concordant",
            default=DEFAULT_OPTIONS["concordant"],
            name="Concordant alignment",
            description="Map subreads of a ZMW to the same genomic location")

    helpstr = "Number of threads."
    align_group.add_argument(
        "--nproc",
        type=int,
        dest="nproc",
        default=DEFAULT_OPTIONS["nproc"],
        #default=15,
        action="store",
        help=helpstr)

    align_group.add_argument("--algorithmOptions",
                             type=str,
                             dest="algorithmOptions",
                             default=None,
                             action="append",
                             help="Pass alignment options through.")
    # XXX the arguments used in SMRTpipe 2.3 are different from the defaults
    # for the command line tool
    tcp.add_str(
        C.ALGORITHM_OPTIONS_ID,
        "algorithmOptions",
        default=C.ALGORITHM_OPTIONS_DEFAULT,
        name="Algorithm options",
        description="List of space-separated arguments passed to BLASR")

    # Filtering criteria and hit policy.
    filter_group = parser.add_argument_group("Filter criteria options")
    helpstr = "The maximum allowed percentage divergence of a read \n" + \
              "from the reference sequence."
    filter_group.add_argument(
        "--maxDivergence",
        dest="maxDivergence",
        type=float,
        default=DEFAULT_OPTIONS["maxDivergence"],
        #default=30,
        action="store",
        help=helpstr)

    helpstr = "The minimum concordance of alignments that\n" + \
              "will be evaluated."
    filter_group.add_argument(
        "--minAccuracy",
        dest="minAccuracy",
        type=float,
        default=DEFAULT_OPTIONS["minAccuracy"],
        #default=70,
        action="store",
        help=helpstr)
    tcp.add_float(C.MIN_ACCURACY_ID,
                  "minAccuracy",
                  default=DEFAULT_OPTIONS["minAccuracy"],
                  name="Min. concordance",
                  description="Minimum required alignment concordance")

    helpstr = "The minimum aligned read length of alignments that\n" + \
              "will be evaluated."
    filter_group.add_argument("--minLength",
                              dest="minLength",
                              type=int,
                              default=DEFAULT_OPTIONS["minLength"],
                              action="store",
                              help=helpstr)
    tcp.add_int(C.MIN_LENGTH_ID,
                "minLength",
                default=DEFAULT_OPTIONS["minLength"],
                name="Min. length",
                description="Minimum required alignment length")

    #helpstr = "Specify a score function for evaluating alignments.\n"
    #helpstr += "  alignerscore : aligner's score in the SAM tag 'as'.\n"
    #helpstr += "  editdist     : edit distance between read and reference.\n"
    #helpstr += "  blasrscore   : blasr's default score function.\n"
    #helpstr += "Default value is {0}.".format(DEFAULT_OPTIONS["scoreFunction"])
    #filter_group.add_argument("--scoreFunction",
    #                    dest="scoreFunction",
    #                    type=str,
    #                    choices=SCOREFUNCTION_CANDIDATES,
    #                    default=DEFAULT_OPTIONS["scoreFunction"],
    #                    action="store",
    #                    help=helpstr)
    #"  userscore    : user-defined score matrix (by -scoreMatrix).\n")
    #parser.add_argument("--scoreMatrix",
    #                    dest="scoreMatrix",
    #                    type=str,
    #                    default=None,
    #                    help=
    #                    "Specify a user-defined score matrix for "
    #                    "scoring reads.The matrix\n"+\
    #                    "is in the format\n"
    #                    "    ACGTN\n"
    #                    "  A abcde\n"
    #                    "  C fghij\n"
    #                    "  G klmno\n"
    #                    "  T pqrst\n"
    #                    "  N uvwxy\n"
    #                    ". The values a...y should be input as a "
    #                    "quoted space separated\n"
    #                    "string: "a b c ... y". Lower scores are better,"
    #                    "so matches\n"
    #                    "should be less than mismatches e.g. a,g,m,s "
    #                    "= -5 (match),\n"
    #                    "mismatch = 6.\n")

    filter_group.add_argument("--scoreCutoff",
                              dest="scoreCutoff",
                              type=int,
                              default=None,
                              action="store",
                              help="The worst score to output an alignment.\n")

    helpstr = "Specify a policy for how to treat multiple hit\n" + \
           "  random    : selects a random hit.\n" + \
           "  all       : selects all hits.\n" + \
           "  allbest   : selects all the best score hits.\n" + \
           "  randombest: selects a random hit from all best score hits.\n" + \
           "  leftmost  : selects a hit which has the best score and the\n" + \
           "              smallest mapping coordinate in any reference.\n"
    filter_group.add_argument("--hitPolicy",
                              dest="hitPolicy",
                              type=str,
                              choices=HITPOLICY_CANDIDATES,
                              default=DEFAULT_OPTIONS["hitPolicy"],
                              action="store",
                              help=helpstr)
    tcp.add_str(C.HIT_POLICY_ID,
                "hitPolicy",
                default=DEFAULT_OPTIONS["hitPolicy"],
                name="Hit policy",
                description=helpstr)

    helpstr = "If specified, do not report adapter-only hits using\n" + \
              "annotations with the reference entry."
    filter_group.add_argument("--filterAdapterOnly",
                              dest="filterAdapterOnly",
                              default=DEFAULT_OPTIONS["filterAdapterOnly"],
                              action="store_true",
                              help=helpstr)

    # Output.
    # CMP H5 output has been deprecated, let's hide associated options.
    cmph5_group = parser.add_argument_group("Options for cmp.h5")
    helpstr = "Specify the ReadType attribute in the cmp.h5 output.\n"
    cmph5_group.add_argument("--readType",
                             dest="readType",
                             type=str,
                             action="store",
                             default=DEFAULT_OPTIONS["readType"],
                             help=argparse.SUPPRESS)

    helpstr = "The output cmp.h5 file which will be sorted, loaded\n" + \
              "with pulse QV information, and repacked, so that it \n" + \
              "can be consumed by quiver directly. This requires\n" + \
              "the input file to be in PacBio bas/pls.h5 format,\n" + \
              "and --useccs must be None."
    cmph5_group.add_argument("--forQuiver",
                             dest="forQuiver",
                             action="store_true",
                             default=DEFAULT_OPTIONS["forQuiver"],
                             help=argparse.SUPPRESS)

    helpstr = "Similar to --forQuiver, the only difference is that \n" + \
              "--useccs can be specified."
    cmph5_group.add_argument("--loadQVs",
                             dest="loadQVs",
                             action="store_true",
                             default=DEFAULT_OPTIONS["loadQVs"],
                             help=argparse.SUPPRESS)

    helpstr = "Load pulse information using -byread option instead\n" + \
              "of -bymetric. Only works when --forQuiver or \n" + \
              "--loadQVs are set."
    cmph5_group.add_argument("--byread",
                             dest="byread",
                             action="store_true",
                             default=DEFAULT_OPTIONS["byread"],
                             help=argparse.SUPPRESS)

    helpstr = "Load the specified (comma-delimited list of) metrics\n" + \
              "instead of the default metrics required by quiver.\n" + \
              "This option only works when --forQuiver  or \n" + \
              "--loadQVs are set."
    cmph5_group.add_argument("--metrics",
                             dest="metrics",
                             type=str,
                             action="store",
                             default=DEFAULT_OPTIONS["metrics"],
                             help=argparse.SUPPRESS)

    # Miscellaneous.
    misc_group = parser.add_argument_group("Miscellaneous options")
    helpstr = "Output names of unaligned reads to specified file."
    misc_group.add_argument("--unaligned",
                            dest="unaligned",
                            type=str,
                            action="store",
                            default=DEFAULT_OPTIONS["unaligned"],
                            help=helpstr)

    helpstr = "Initialize the random number generator with a none-zero \n" + \
              "integer. Zero means that current system time is used.\n"
    misc_group.add_argument("--seed",
                            dest="seed",
                            type=int,
                            default=DEFAULT_OPTIONS["seed"],
                            action="store",
                            help=helpstr)

    helpstr = "Specify a directory for saving temporary files.\n"
    misc_group.add_argument("--tmpDir",
                            dest="tmpDir",
                            type=str,
                            action="store",
                            default=DEFAULT_OPTIONS["tmpDir"],
                            help=helpstr)

    # Keep all temporary & intermediate files.
    misc_group.add_argument("--keepTmpFiles",
                            dest="keepTmpFiles",
                            action="store_true",
                            default=False,
                            help=argparse.SUPPRESS)
    return parser
Exemple #9
0
 def _f(p):
     add_debug_option(p)
     f = __gather_options(output_file_msg, input_file_msg, validate_file,
                          chunk_key_func)
     return f(p)
Exemple #10
0
def add_options_to_argument_parser(parser):

    def canonicalizedFilePath(path):
        return os.path.abspath(os.path.expanduser(path))

    basics = parser.add_argument_group("Basic required options")
    basics.add_argument(
        "inputFilename",
        type=canonicalizedFilePath,
        help="The input cmp.h5 or BAM alignment file")
    basics.add_argument(
        "--referenceFilename", "--reference", "-r",
        action="store",
        dest="referenceFilename",
        type=canonicalizedFilePath,
        required=True,
        help="The filename of the reference FASTA file")
    basics.add_argument(
        "-o", "--outputFilename",
        dest="outputFilenames",
        required=True,
        type=str,
        action="append",
        default=[],
        help="The output filename(s), as a comma-separated list." + \
             "Valid output formats are .fa/.fasta, .fq/.fastq, .gff, .vcf")

    parallelism = parser.add_argument_group("Parallelism")
    parallelism.add_argument(
        "-j", "--numWorkers",
        dest="numWorkers",
        type=int,
        default=1,
        help="The number of worker processes to be used")

    filtering = parser.add_argument_group("Output filtering")
    filtering.add_argument(
        "--minConfidence", "-q",
        action="store",
        dest="minConfidence",
        type=int,
        default=Constants.DEFAULT_MIN_CONFIDENCE,
        help="The minimum confidence for a variant call to be output to variants.{gff,vcf}")
    filtering.add_argument(
        "--minCoverage", "-x",
        action="store",
        dest="minCoverage",
        default=Constants.DEFAULT_MIN_COVERAGE,
        type=int,
        help="The minimum site coverage that must be achieved for variant calls and " + \
             "consensus to be calculated for a site.")
    filtering.add_argument(
        "--noEvidenceConsensusCall",
        action="store",
        choices=["nocall", "reference", "lowercasereference"],
        default="lowercasereference",
        help="The consensus base that will be output for sites with no effective coverage.")


    readSelection = parser.add_argument_group("Read selection/filtering")
    readSelection.add_argument(
        "--coverage", "-X",
        action="store",
        dest="coverage",
        type=int,
        default=Constants.DEFAULT_MAX_COVERAGE,
        help="A designation of the maximum coverage level to be used for analysis." + \
             " Exact interpretation is algorithm-specific.")
    readSelection.add_argument(
        "--minMapQV", "-m",
        action="store",
        dest="minMapQV",
        type=float,
        default=Constants.DEFAULT_MIN_MAPQV,
        help="The minimum MapQV for reads that will be used for analysis.")
    # Since the reference isn't loaded at options processing time, we
    # can't grok the referenceWindow specified until later.  We store
    # it as a string (referenceWindowsAsString) and it will later be
    # interpreted and stored as a proper window tuple (referenceWindow)
    readSelection.add_argument(
        "--referenceWindow", "--referenceWindows", "-w",
        action="store",
        dest="referenceWindowsAsString",
        type=str,
        help="The window (or multiple comma-delimited windows) of the reference to " + \
             "be processed, in the format refGroup:refStart-refEnd "                 + \
             "(default: entire reference).",
        default=None)

    readSelection.add_argument(
        "--alignmentSetRefWindows",
        action="store_true",
        dest="referenceWindowsFromAlignment",
        help="The window (or multiple comma-delimited windows) of the reference to " + \
             "be processed, in the format refGroup:refStart-refEnd "                 + \
             "will be pulled from the alignment file.",
        default=False)

    def slurpWindowFile(fname):
        return ",".join(map(str.strip, open(fname).readlines()))

    readSelection.add_argument(
        "--referenceWindowsFile", "-W",
        action="store",
        dest="referenceWindowsAsString",
        type=slurpWindowFile,
        help="A file containing reference window designations, one per line",
        default=None)
    readSelection.add_argument(
        "--barcode",
        type=str,
        dest="_barcode",
        help="Only process reads with the given barcode name.")
    def parseReadStratum(s):
        rs = map(int, s.split("/"))
        assert len(rs) == 2
        assert rs[0] < rs[1]
        return rs
    readSelection.add_argument(
        "--readStratum",
        help="A string of the form 'n/N', where n, and N are integers, 0 <= n < N, designating" \
             " that the reads are to be deterministically split into N strata of roughly even"  \
             " size, and stratum n is to be used for variant and consensus calling.  This is"   \
             " mostly useful for Quiver development.",
        dest="readStratum",
        default=None,
        type=parseReadStratum)
    readSelection.add_argument(
        "--minReadScore",
        action="store",
        dest="minReadScore",
        type=float,
        default=Constants.DEFAULT_MIN_READSCORE,
        help="The minimum ReadScore for reads that will be used for analysis (arrow-only).")
    readSelection.add_argument(
        "--minSnr",
        action="store",
        dest="minHqRegionSnr",
        type=float,
        default=Constants.DEFAULT_MIN_HQREGIONSNR,
        help="The minimum acceptable signal-to-noise over all channels for reads that will be used for analysis (arrow-only).")
    readSelection.add_argument(
        "--minZScore",
        action="store",
        dest="minZScore",
        type=float,
        default=Constants.DEFAULT_MIN_ZSCORE,
        help="The minimum acceptable z-score for reads that will be used for analysis (arrow-only).")
    readSelection.add_argument(
        "--minAccuracy",
        action="store",
        dest="minAccuracy",
        type=float,
        default=Constants.DEFAULT_MIN_ACCURACY,
        help="The minimum acceptable window-global alignment accuracy for reads that will be used for the analysis (arrow-only).")

    algorithm = parser.add_argument_group("Algorithm and parameter settings")
    algorithm.add_argument(
        "--algorithm",
        action="store",
        dest="algorithm",
        type=str,
        choices=Constants.ALGORITHM_CHOICES,
        default=Constants.DEFAULT_ALGORITHM)
    algorithm.add_argument(
        "--parametersFile", "-P",
        dest="parametersFile",
        type=str,
        default=None,
        help="Parameter set filename (such as ArrowParameters.json or "       + \
             "QuiverParameters.ini), or directory D such that either "        + \
             "D/*/GenomicConsensus/QuiverParameters.ini, "                    + \
             "or D/GenomicConsensus/QuiverParameters.ini, is found.  In the " + \
             "former case, the lexically largest path is chosen.")
    algorithm.add_argument(
        "--parametersSpec", "-p",
        action="store",
        dest="parametersSpec",
        type=str,
        default="auto",
        help="Name of parameter set (chemistry.model) to select from the "   + \
             "parameters file, or just the name of the chemistry, in which " + \
             "case the best available model is chosen.  Default is 'auto', " + \
             "which selects the best parameter set from the alignment data")
    algorithm.add_argument(
        "--maskRadius",
        dest="maskRadius",
        type=int,
        default=Constants.DEFAULT_MASK_RADIUS,
        help="Radius of window to use when excluding local regions for " + \
             "exceeding maskMinErrorRate, where 0 disables any filtering (arrow-only).")
    algorithm.add_argument(
        "--maskErrorRate",
        dest="maskErrorRate",
        type=float,
        default=Constants.DEFAULT_MASK_ERROR_RATE,
        help="Maximum local error rate before the local region defined by " + \
             "maskRadius is excluded from polishing (arrow-only).")

    debugging = parser.add_argument_group("Verbosity and debugging/profiling")
    add_debug_option(debugging)
    debugging.add_argument(
        "--notrace",
        action="store_true",
        dest="notrace",
        default=False,
        help="Suppress stacktrace for exceptions (to simplify testing)")
    debugging.add_argument(
        "--pdbAtStartup",
        action="store_true",
        dest="pdbAtStartup",
        default=False,
        help="Drop into Python debugger at startup (requires ipdb)")
    debugging.add_argument(
        "--profile",
        action="store_true",
        dest="doProfiling",
        default=False,
        help="Enable Python-level profiling (using cProfile).")
    debugging.add_argument(
        "--dumpEvidence", "-d",
        dest="dumpEvidence",
        nargs="?",
        default=None,
        const="variants",
        choices=["variants", "all", "outliers"])
    debugging.add_argument(
        "--evidenceDirectory",
        default="evidence_dump")
    debugging.add_argument(
        "--annotateGFF",
        action="store_true",
        help="Augment GFF variant records with additional information")
    debugging.add_argument(
        "--reportEffectiveCoverage",
        action="store_true",
        help="Additionally record the *post-filtering* coverage at variant sites")

    advanced = parser.add_argument_group("Advanced configuration options")
    advanced.add_argument(
        "--diploid",
        action="store_true",
        help="Enable detection of heterozygous variants (experimental)")
    advanced.add_argument(
        "--queueSize", "-Q",
        action="store",
        dest="queueSize",
        type=int,
        default=200)
    advanced.add_argument(
        "--threaded", "-T",
        action="store_true",
        dest="threaded",
        default=False,
        help="Run threads instead of processes (for debugging purposes only)")
    advanced.add_argument(
        "--referenceChunkSize", "-C",
        action="store",
        dest="referenceChunkSize",
        type=int,
        default=500)
    advanced.add_argument(
        "--fancyChunking",
        default=True,
        action="store_true",
        help="Adaptive reference chunking designed to handle coverage cutouts better")
    advanced.add_argument(
        "--simpleChunking",
        dest="fancyChunking",
        action="store_false",
        help="Disable adaptive reference chunking")
    advanced.add_argument(
        "--referenceChunkOverlap",
        action="store",
        dest="referenceChunkOverlap",
        type=int,
        default=5)
    advanced.add_argument(
        "--autoDisableHdf5ChunkCache",
        action="store",
        type=int,
        default=500,
        help="Disable the HDF5 chunk cache when the number of datasets in the cmp.h5 " + \
             "exceeds the given threshold")
    advanced.add_argument(
        "--aligner", "-a",
        action="store",
        choices=["affine", "simple"],
        default="affine",
        help="The pairwise alignment algorithm that will be used to produce variant calls" \
             " from the consensus (Quiver only).")
    advanced.add_argument(
        "--refineDinucleotideRepeats",
        dest="refineDinucleotideRepeats",
        action="store_true",
        help="Require quiver maximum likelihood search to try one less/more repeat copy in"  \
             " dinucleotide repeats, which seem to be the most frequent cause of suboptimal" \
             " convergence (getting trapped in local optimum) (Quiver only)")
    advanced.add_argument(
        "--noRefineDinucleotideRepeats",
        dest="refineDinucleotideRepeats",
        action="store_false",
        help="Disable dinucleotide refinement")
    advanced.set_defaults(refineDinucleotideRepeats=True)
    advanced.add_argument(
        "--fast",
        dest="fastMode",
        action="store_true",
        help="Cut some corners to run faster.  Unsupported!")
    advanced.add_argument(
        "--skipUnrecognizedContigs",
        action="store_true",
        help="Do not abort when told to process a reference window (via -w/--referenceWindow[s]) " \
             "that has no aligned coverage.  Outputs emptyish files if there are no remaining "    \
             "non-degenerate windows.  Only intended for use by smrtpipe scatter/gather.")

    return parser
def _example_parser():
    p = get_default_argparser("1.0.0", "Example Mock Parser")
    p = CU.add_debug_option(p)
    p.add_argument('example_file', type=str, help="No testing of existence")
    return p
Exemple #12
0
def add_args_run_diagnstic(p):
    _add_required_preset_xml_option(p)
    add_debug_option(p)
    _add_output_dir_option(p)
    _add_simple_mode_option(p)
    return p