class Params(define.Params):
     Minimunmappingquality = define.integer(
         name="Minimun mapping quality",
         default=20,
         description=
         "[MINIMUM_MAPPING_QUALITY]Minimum mapping quality for a read to contribute coverage. Default value: 20. This option can be set to 'null' to clear the default value"
     )
     Minimumbasequality = define.integer(
         name="Minimum base quality",
         default=20,
         description=
         "[MINIMUM_BASE_QUALITY]Minimum base quality for a base to contribute coverage. Default value: 20. This option can be set to 'null' to clear the default value."
     )
     Coveragecap = define.integer(
         name="Coverage cap",
         default=250,
         description=
         "[COVERAGE_CAP]Treat bases with coverage exceeding this value as if they had coverage at this value. Default value: 250. This option can be set to 'null' to clear the default value."
     )
     Stopafter = define.real(
         name="Stop after",
         default=-1,
         description=
         "[STOP_AFTER]For debugging purposes, stop after processing this many genomic bases. Default value: -1. This option can be set to 'null' to clear the default value."
     )
     Validationstringency = define.enum(
         name="Validation stringency",
         default="SILENT",
         values=[('STRICT', 'STRICT', ''), ('LENIENT', 'LENIENT', ''),
                 ('SILENT', 'SILENT', '')],
         description=
         "[VALIDATION_STRINGENCY]Validation stringency for all BAM/SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."
     )
Example #2
0
 class Params(define.Params):
     Maximuminsertsize = define.integer(
         name="Maximum insert size",
         default=100000,
         description=
         "[MAX_INSERT_SIZE]Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. [Default: 100000]."
     )
     Adaptersequence = define.string(
         name="Adapter sequence",
         description=
         "[ADAPTER_SEQUENCE]List of adapter sequences to use when processing the alignment metrics This option may be specified 0 or more times. Separate each by comma or space."
     )
     Metricaccumulationlevel = define.string(
         name="Metric accumulation level",
         description=
         "[METRIC_ACCUMULATION_LEVEL]The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. Separate each by comma or space."
     )
     Validationstringency = define.enum(
         name="Validation stringency",
         default="SILENT",
         values=[('STRICT', 'STRICT', ''), ('LENIENT', 'LENIENT', ''),
                 ('SILENT', 'SILENT', '')],
         description=
         "[VALIDATION_STRINGENCY]Validation stringency for all BAM/SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."
     )
     Isbisulfitesequenced = define.boolean(
         name="Is bisulfite sequenced",
         default=False,
         description=
         "[IS_BISULFITE_SEQUENCED]Whether the SAM or BAM file consists of bisulfite sequenced reads. [Default: false]."
     )
     Assumesorted = define.boolean(
         name="Assume sorted",
         default=True,
         description=
         "[ASSUME_SORTED]If true (default), then the sort order in the header file will be ignored. [Default: true]."
     )
     Compressionlevel = define.integer(
         name="Compression level",
         default=5,
         description=
         "[COMPRESSION_LEVEL]Compression level for all compressed files created (e.g. BAM and GELI)"
     )
     CreateIndex = define.boolean(
         name="Create Index",
         default=True,
         description=
         "[CREATE_INDEX]Whether to create a BAM index when writing a coordinate-sorted BAM file"
     )
 class Params(define.Params):
     Assumesorted= define.boolean( name = "Assume sorted", default = True, description = "[ASSUME_SORTED]If true, assume that the input files are in the same sort order as the requested output sort order, even if their headers say otherwise. [Default: false]")
     Sortorder= define.enum( name = "Sort order", default = "coordinate", values = [( 'unsorted','unsorted','' ),( 'queryname','queryname','' ),( 'coordinate','coordinate','' )], description = "[SORT_ORDER]Desired sort order. [default: coordinate]")
     Validationstringency= define.enum( name = "Validation stringency", default = "SILENT", values = [( 'STRICT','STRICT','' ),( 'LENIENT','LENIENT','' ),( 'SILENT','SILENT','' )], description = "Validation stringency for all BAM/SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.")
     MergeSequenceDictionary= define.boolean( name = "Merge Sequence Dictionary", default = False, description = "[MERGE_SEQUENCE_DICTIONARIES] Merge the sequence dictionaries")
     Compressionlevel= define.integer( name = "Compression level", default = 5, description = "Compression level for all compressed files created (e.g. BAM and GELI)")
     CreateIndex= define.boolean( name = "Create Index", default = True, description = "Whether to create a BAM index when writing a coordinate-sorted BAM file")
    class Params(define.Params):
        AllowNonoverlappingCommandLineSamples = define.boolean(
            name="Allow Nonoverlapping Command Line Samples",
            default=False,
            description=
            "[--ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES]Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored."
        )
        ExcludeSampleName = define.string(
            name="Exclude Sample Name",
            description=
            "[--exclude_sample_name] Exclude genotypes from this sample. Can be specified multiple times"
        )
        ExcludeFiltered = define.boolean(
            name="Exclude Filtered",
            default=False,
            description="[-ef]Don't include filtered loci in the analysis")
        ExcludeNonVariants = define.boolean(
            name="Exclude Non Variants",
            default=False,
            description=
            "[-env]Don't include loci found to be non-variant after the subsetting procedure"
        )
        KeepOriginalAc = define.boolean(
            name="Keep Original Ac",
            default=False,
            description=
            "[--keepOriginalAC]Store the original AC, AF, and AN values in the INFO field after selecting (using keys AC_Orig, AF_Orig, and AN_Orig)"
        )
        MaxIndelSize = define.integer(
            name="Max Indel Size",
            default=2147483647,
            description="[--maxIndelSize]indel size select")
        MendelianViolation = define.boolean(
            name="Mendelian Violation",
            default=False,
            description="[-mv]output mendelian violation sites only")
        Mvq = define.real(
            name="Mvq",
            default=0,
            description=
            "[-mvq]Minimum genotype QUAL score for each trio member required to accept a site as a violation"
        )
        Regenotype = define.boolean(
            name="Regenotype",
            default=False,
            description=
            "[-regenotype]re-genotype the selected samples based on their GLs (or PLs)"
        )
        RemoveFractionGenotypes = define.real(
            name="Remove Fraction Genotypes",
            default=0,
            description=
            "[-fractionGenotypes]Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall"
        )
        RestrictAllelesTo = define.enum(
            name="Restrict Alleles To",
            default="ALL",
            values=[('ALL', 'ALL', ''), ('MULTIALLELIC', 'MULTIALLELIC', ''),
                    ('BIALLELIC', 'BIALLELIC', '')],
            description=
            "[--restrictAllelesTo]Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC"
        )
        SampleExpressions = define.string(
            name="Sample Expressions",
            description=
            "[-se]Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times"
        )
        SampleName = define.string(
            name="Sample Name",
            description=
            "[-sn]Include genotypes from this sample. Can be specified multiple times"
        )
        SelectExpressions = define.string(
            name="Select Expressions",
            description=
            "[-select]One or more criteria to use when selecting the data")
        SelectRandomFraction = define.real(
            name="Select Random Fraction",
            default=0,
            description=
            "[-fraction]Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track"
        )
        SelectTypeToInclude = define.string(
            name="Select Type To Include",
            description=
            "[-selectType] Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. List them out and separate by comma or space"
        )

        DisableRandomization = define.boolean(
            name="Disable Randomization",
            default=False,
            description=
            "[-ndrs]Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator."
        )
        AllowPotentiallyMisencodedQuals = define.boolean(
            name="Allow Potentially Misencoded Quals",
            default=False,
            description=
            "[-allowPotentiallyMisencodedQuals] Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file."
        )
        BAQCalculationType = define.enum(
            name="BAQ Calculation Type",
            default="OFF",
            values=[('OFF', 'OFF', ''),
                    ('CALCULATE_AS_NECESSARY', 'CALCULATE_AS_NECESSARY', ''),
                    ('RECALCULATE', 'RECALCULATE', '')],
            description="[-baq]Type of BAQ calculation to apply in the engine."
        )
        BAQGapOpenPenalty = define.real(
            name="BAQ Gap Open Penalty",
            default=40,
            description=
            "[-baqGOP]BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
        )
        DefaultBaseQualities = define.integer(
            name="Default Base Qualities",
            default=-1,
            description=
            "If reads are missing some or all base quality scores, this value will be used for all base quality scores"
        )
        DisableIndelQuals = define.boolean(
            name="Disable Indel Quals",
            default=False,
            description=
            "[-DBQ]If 'true', disables printing of base insertion and base deletion tags (with -BQSR). Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced."
        )
        DownsampletoCoverage = define.integer(
            name="Downsample to Coverage",
            description=
            "[-dcov]Coverage to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position."
        )
        DownsampletoFraction = define.real(
            name="Downsample to Fraction",
            description="[-dfrac]Fraction [0.0-1.0] of reads to downsample to")
        DownsamplingType = define.enum(
            name="Downsampling Type",
            default="null",
            values=[('NONE', 'NONE', ''), ('ALL_READS', 'ALL_READS', ''),
                    ('BY_SAMPLE', 'BY_SAMPLE', ''), ('null', 'null', '')],
            description=
            "[-dt]Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here"
        )
        EmitOriginalQuals = define.boolean(
            name="Emit Original Quals",
            default=False,
            description=
            "[-EOQ]If true, enables printing of the OQ tag with the original base qualities (with -BQSR)"
        )
        FixMisencodedQuals = define.boolean(
            name="Fix Misencoded Quals",
            default=False,
            description=
            "[-fixMisencodedQuals]Fix mis-encoded base quality scores")
        IntervalMerging = define.enum(
            name="Interval Merging",
            default="ALL",
            values=[('ALL', 'ALL', ''),
                    ('OVERLAPPING_ONLY', 'OVERLAPPING_ONLY', '')],
            description=
            "[-im]Indicates the interval merging rule we should use for abutting intervals"
        )
        IntervalPadding = define.integer(
            name="Interval Padding",
            default=0,
            description=
            "[-ip]Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument"
        )
        IntervalSetRule = define.enum(
            name="Interval Set Rule",
            default="UNION",
            values=[('UNION', 'UNION', ''),
                    ('INTERSECTION', 'INTERSECTION', '')],
            description=
            "[-isr]Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs"
        )
        KeepProgramRecords = define.boolean(
            name="Keep Program Records",
            default=False,
            description=
            "[-kpr]Should we override the Walker's default and keep program records from the SAM header"
        )
        MaxRuntime = define.integer(
            name="Max Runtime",
            default=-1,
            description=
            "[-maxRuntime]If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure.  By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits"
        )
        MaxRuntimeUnits = define.enum(
            name="Max Runtime Units",
            default="MINUTES",
            values=[('NANOSECONDS', 'NANOSECONDS', ''),
                    ('MICROSECONDS', 'MICROSECONDS', ''),
                    ('MILLISECONDS', 'MILLISECONDS', ''),
                    ('SECONDS', 'SECONDS', ''), ('MINUTES', 'MINUTES', ''),
                    ('HOURS', 'HOURS', ''), ('DAYS', 'DAYS', '')],
            description="[-maxRuntimeUnits] The TimeUnit for maxRuntime")
        NonDeterministicRandomSeed = define.boolean(
            name="Non Deterministic Random Seed",
            default=False,
            description=
            "[-ndrs]Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run"
        )
        PedigreeString = define.string(
            name="Pedigree String",
            description="[-pedString]Pedigree string for samples")
        PedigreeValidationType = define.enum(
            name="Pedigree Validation Type",
            default="STRICT",
            values=[('STRICT', 'STRICT', ''), ('SILENT', 'SILENT', '')],
            description=
            "[-pedValidationType]How strict should we be in validating the pedigree information?"
        )
        PhoneHome = define.enum(
            name="Phone Home",
            default="STANDARD",
            values=[('NO_ET', 'NO_ET', ''), ('STANDARD', 'STANDARD', '')],
            description=
            "[-et]What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest for details."
        )
        PreserveQscoresLessThan = define.integer(
            name="Preserve Qscores Less Than",
            default=6,
            description=
            "[-preserveQ]Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)"
        )
        ReadFilter = define.string(
            name="Read Filter",
            default='BadCigar',
            description=
            "[-rf]Specify filtration criteria to apply to each read individually"
        )
        ReadGroupBlackList = define.string(
            name="Read Group Black List",
            description=
            "[-rgbl]Filters out read groups matching : or a .txt file containing the filter strings one per line."
        )
        RemoveProgramRecords = define.boolean(
            name="Remove Program Records",
            default=False,
            description=
            "[-rpr]Should we override the Walker's default and remove program records from the SAM header"
        )
        Tag = define.string(
            name="Tag",
            description=
            "[-tag]Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis"
        )
        Unsafe = define.enum(
            name="Unsafe",
            default="null",
            values=[('ALLOW_UNINDEXED_BAM', 'ALLOW_UNINDEXED_BAM', ''),
                    ('ALLOW_UNSET_BAM_SORT_ORDER',
                     'ALLOW_UNSET_BAM_SORT_ORDER', ''),
                    ('NO_READ_ORDER_VERIFICATION',
                     'NO_READ_ORDER_VERIFICATION', ''),
                    ('ALLOW_SEQ_DICT_INCOMPATIBILITY',
                     'ALLOW_SEQ_DICT_INCOMPATIBILITY', ''),
                    ('LENIENT_VCF_PROCESSING', 'LENIENT_VCF_PROCESSING', ''),
                    ('ALL', 'ALL', ''), ('null', 'null', '')],
            description=
            "[-U]If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument."
        )
        UseLegacyDownsampler = define.boolean(
            name="Use Legacy Downsampler",
            default=False,
            description=
            "Use the legacy downsampling implementation instead of the newer, less-tested implementation"
        )
        UseOriginalQualities = define.boolean(
            name="Use Original Qualities",
            default=False,
            description=
            "[-OQ]If set, use the original base quality scores from the OQ tag when present instead of the standard scores"
        )
        ValidationStrictness = define.enum(
            name="Validation Strictness",
            default="SILENT",
            values=[('SILENT', 'SILENT', ''), ('LENIENT', 'LENIENT', ''),
                    ('STRICT', 'STRICT', '')],
            description="[-S]How strict should we be with validation")
Example #5
0
    class Params(define.Params):
        Minimumseedlength = define.integer(
            name="Minimum seed length",
            default=19,
            description=
            "[-k]Matches shorter than INT will be missed. The alignment speed is usually insensitive to this value unless it significantly deviates 20. [default: 19]"
        )
        Bandwidthforbandedalignment = define.integer(
            name="Band width for banded alignment",
            default=100,
            description="[-w]Band width in the banded alignment [default: 100]"
        )
        OffdiagonalXdropoff = define.integer(
            name="Off-diagonal X-dropoff",
            default=100,
            description=
            "[-d]Stop extension when the difference between the best and the current extension score is above |i-j|*A+INT, where i and j are the current positions of the query and reference, respectively, and A is the matching score. Z-dropoff not only avoids unnecessary extension, but also reduces poor alignments inside a long good alignment. [default: 100]"
        )
        TriggerreseedingforaMEMlongerthanminSeedLenFLOAT = define.real(
            name="Trigger re-seeding for a MEM longer than minSeedLen*FLOAT",
            default=1.5,
            description=
            "[-r]This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy. [default: 1.5]"
        )
        SkipseedswithmorethanINToccurrences = define.integer(
            name="Skip seeds with more than INT occurrences",
            default=500,
            description=
            "[-c]Discard a MEM if it has more than INT occurence in the genome. This is an insensitive parameter. [default: 500]"
        )
        Dropchainfraction = define.real(
            name="Drop chain fraction",
            default=0.5,
            description=
            "[-D]Drop chains shorter than FLOAT fraction of the longest overlapping chain."
        )
        Dropchainlength = define.integer(
            name="Drop chain length",
            default=0,
            description="[-W]Discard a chain if seeded bases shorter than INT."
        )
        Materescuerounds = define.integer(
            name="Mate rescue rounds",
            default=50,
            description=
            "[-m] Perform at most INT rounds of mate rescues for each read.")
        Skipmaterescue = define.boolean(name="Skip mate rescue",
                                        default=False,
                                        description="[-S] Skip mate rescue")
        SkippairingmaterescueperformedunlessSalsoinuse = define.boolean(
            name="Skip pairing; mate rescue performed unless -S also in use",
            default=False,
            description=
            "[-P] In the paired-end mode, perform SW to rescue missing hits only but do not try to find hits that fit a proper pair."
        )
        Discardexactmatches = define.boolean(
            name="Discard exact matches",
            default=False,
            description="[-e] Discard full-length exact matches")
        Readtype = define.enum(
            name="Read type",
            default="None",
            values=[('None', 'None', ''), ('pacbio', 'pacbio', ''),
                    ('pbread', 'pbread', '')],
            description=
            "[-x] Read type. Setting -x changes multiple parameters unless overridden pacbio: -k17 -W40 -c1000 -r10 -A2 -B5 -O2 -E1 -L0; pbread: -k13 -W40 -c1000 -r10 -A2 -B5 -O2 -E1 -N25 -FeaD.001"
        )
        Scoreforasequencematch = define.integer(
            name="Score for a sequence match",
            default=1,
            description="[-A] Score for a sequence match. [default: 1]")
        Penaltyforamismatch = define.integer(
            name="Penalty for a mismatch",
            default=4,
            description="[-B] Penalty for a mismatch. [default: 4]")
        Gapopenpenaltyfordeletions = define.integer(
            name="Gap open penalty for deletions",
            default=6,
            description="[-O] Gap open penalty for deletions [default: 6]")
        Gapopenpenaltyforinsertions = define.integer(
            name="Gap open penalty for insertions",
            default=6,
            description="[-O] Gap open penalty for insertions [default: 6]")
        Gapextensionpenaltyfordeletion = define.integer(
            name="Gap extension penalty for deletion",
            default=1,
            description=
            "[-E] Gap extension penalty for deletion. A gap of length k costs O + k*E (i.e. -O is for opening a zero-length gap). [default: 1]"
        )
        Gapextensionpenaltyforinsertion = define.integer(
            name="Gap extension penalty for insertion",
            default=1,
            description=
            "[-E] Gap extension penalty for insertion. A gap of length k costs O + k*E (i.e. -O is for opening a zero-length gap). [default: 1]"
        )
        Penaltyfor5endclipping = define.integer(
            name="Penalty for 5'-end clipping",
            default=5,
            description=
            "[-L] When performing SW extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best SW score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best SW score; clipping penalty is not deducted. [default: 5]"
        )
        Penaltyfor3endclipping = define.integer(
            name="Penalty for 3'-end clipping",
            default=5,
            description=
            "[-L] When performing SW extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best SW score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best SW score; clipping penalty is not deducted. [default: 5]"
        )
        Penaltyforanunpairedreadpair = define.integer(
            name="Penalty for an unpaired read pair",
            default=17,
            description=
            "[-U] BWA-MEM scores an unpaired read pair as scoreRead1+scoreRead2-INT and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these two scores to determine whether we should force pairing. [default: 17]"
        )
        Firstqueryfileconsistsofinterleavedpairedendsequences = define.boolean(
            name=
            "First query file consists of interleaved paired-end sequences",
            default=False,
            description=
            "Assume the first input query file is interleaved paired-end FASTA/Q. See the command description for details."
        )
        XAtag = define.integer(
            name="XA tag",
            default=5,
            description="[-h]If #hits < INT, output all in the XA tag")
        Scorethreshold = define.integer(
            name="Score threshold",
            default=30,
            description="[-T]Minimum score to output [default: 30]")
        OutputallalignmentsforSEorunpairedPE = define.boolean(
            name="Output all alignments for SE or unpaired PE",
            default=False,
            description=
            "[-a]Output all found alignments for single-end or unpaired paired-end reads. These alignments will be flagged as secondary alignments."
        )
        AppendappendFASTAQcommenttoSAMoutput = define.boolean(
            name="Append append FASTA/Q comment to SAM output",
            default=False,
            description=
            "[-C]This option can be used to transfer read meta information (e.g. barcode) to the SAM output. Note that the FASTA/Q comment (the string after a space in the header line) must conform the SAM spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output."
        )
        Usesoftclippingforsupplementaryalignments = define.boolean(
            name="Use soft clipping for supplementary alignments.",
            default=False,
            description="[-Y]Use soft clipping for supplementary alignments.")
        Markshortersplithitsassecondary = define.boolean(
            name="Mark shorter split hits as secondary.",
            default=True,
            description=
            "[-M]Mark shorter split hits as secondary (for Picard compatibility)."
        )
        Completereadgroupheaderline = define.string(
            name="Complete read group header line",
            description=
            "[-R]Specify the read group in a format like '@RG\tID:foo\tSM:bar'. This value takes precedence over per-attribute parameters. [default: constructed from per-attribute parameters or inferred from metadata]"
        )

        Outputformat = define.enum(
            name="Output format",
            default="Sorted BAM",
            values=[('SAM', 'SAM', ''), ('BAM', 'BAM', ''),
                    ('Sorted BAM', 'Sorted BAM', '')],
            description=
            "Select format to output. Sorted BAM option will output coordinate sorted BAM."
        )
        CreateIndex = define.boolean(
            name="Create Index",
            default=True,
            description="Create Index for Sorted BAM file")
        Filteroutsecondaryalignments = define.boolean(
            name="Filter out secondary alignments",
            default=True,
            description=
            "Set to true to filter out secondary alignments. Works only with output format set to BAM or Sorted BAM"
        )
        Duplication = define.enum(
            name="Duplication",
            default="None",
            values=[('None', 'None', ''),
                    ('Mark Duplicates', 'Mark Duplicates', ''),
                    ('Remove duplicates', 'Remove duplicates', '')],
            description=
            "Only works for Sorted BAM output. Remove duplicates reads from all output files. Implies: Exclude reads marked as duplicates from discordant, splitter, and/or unmapped file."
        )
 class Params(define.Params):
     #ExcludeNonVariants = define.string(name="ExcludeNonVariants", description="Don't include non-variant sites.", required=True,default = "excludeNonVariants")
     #SelectType= define.string(name = "SelectType", description = "Select only a certain type of variants from the input file.Valid types are INDEL,SNP,MIXED,MNP,SYMBOLIC,NO_VARIATION.",required = True,default = 'SNP')
     SelectType = define.enum(
         [(u'SNP', u'SNP', u'SNP'), (u'INDEL', u'INDEL', u'INDEL'),
          (u'MIXED', u'MIXED', u'MIXED'), (u'MNP', u'MNP', u'MNP'),
          (u'SYMBOLIC', u'SYMBOLIC', u'SYMBOLIC'),
          (u'NO_VARIATION', u'NO_VARIATION', u'NO_VARIATION')],
         name="SelectType",
         description=
         "Select only a certain type of variants from the input file.Valid types are INDEL,SNP,MIXED,MNP,SYMBOLIC,NO_VARIATION.",
         required=True,
         default='SNP')
     Resource_hapmap = define.string(
         name="Resource_hapmap",
         description=
         "Hapmap sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run.",
         required=False,
         default='hapmap,known=false,training=true,truth=true,prior=15.0')
     Resource_omni = define.string(
         name="Resource_omni",
         description=
         "Omni sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run).",
         required=False,
         default='omni,known=false,training=true,truth=true,prior=12.0')
     Resource_1000G = define.string(
         name="Resource_1000G",
         description=
         "1000G sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run).",
         required=False,
         default='1000G,known=false,training=true,truth=false,prior=10.0')
     Resource_dbsnp = define.string(
         name="Resource_dbsnp",
         description=
         "Dbsnp sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run).",
         required=False,
         default='dbsnp,known=true,training=false,truth=false,prior=2.0')
     Resource_Indel_mills = define.string(
         name="Resource_mills",
         description=
         "Mills sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run).",
         required=False,
         default='mills,known=true,training=true,truth=true,prior=12.0')
     An = define.string(
         name="The names of the annotations",
         description=
         "The names of the annotations which should used for calculations.Format:[value1],[value2],[value3],...",
         required=True,
         default="DP,QD,FS,SOR,MQ,MQRankSum,ReadPosRankSum")
     MaxGaussians = define.integer(
         name="MaxGaussians",
         description="Max number of Gaussians for the positive model",
         required=False,
         default=8)
     Mode = define.enum([(u'SNP', u'SNP', u'SNP'),
                         (u'INDEL', u'INDEL', u'INDEL'),
                         (u'BOTH', u'BOTH', u'BOTH')],
                        name="Mode",
                        description="Recalibration mode to employ",
                        required=True,
                        default="SNP")
     Tranche = define.string(
         name="Tranche",
         description=
         "The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)",
         required=False,
         default="100.0,99.9,99.0,90.0")
     Ts_filter_level = define.real(
         name="Ts_filter_level",
         description=
         "The truth sensitivity level at which to start filtering.",
         required=False,
         default=99.0)
     #ExcludeFiltered = define.string(name="ExcludeFiltered",description = "Don't include filtered sites.",required = False,default = "excludeFiltered")
     rename = define.string(
         name='Rename prefix',
         description=
         "Rename the output file as [PREFIX].vcf If a metadata key is given as prefix, the output file will be renamed with the corresponding metadata value of the first input file.",
         required=False,
         default='')
     cmd_SelectVariants = define.string(
         name="Extra options 1",
         description="Advanced command line for SelectVariants.",
         required=False,
         default="")
     cmd_VariantRecalibrator = define.string(
         name="Extra options 2",
         description="Advanced command line for VariantRecalibrator.",
         required=False,
         default="")
     cmd_ApplyRecalibration = define.string(
         name="Extra options 3",
         description="Advanced command line for ApplyRecalibration.",
         required=False,
         default="")
     cmd_SelectVariants1 = define.string(
         name="Extra options 4",
         description="Advanced command line for SelectVariants.",
         required=False,
         default="")
Example #7
0
 class Params(define.Params):
     DisableRandomization = define.boolean(
         name="Disable Randomization",
         default=False,
         description=
         "[-ndrs]Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator."
     )
     AllowPotentiallyMisencodedQuals = define.boolean(
         name="Allow Potentially Misencoded Quals",
         default=False,
         description=
         "[-allowPotentiallyMisencodedQuals] Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file."
     )
     BAQCalculationType = define.enum(
         name="BAQ Calculation Type",
         default="OFF",
         values=[('OFF', 'OFF', ''),
                 ('CALCULATE_AS_NECESSARY', 'CALCULATE_AS_NECESSARY', ''),
                 ('RECALCULATE', 'RECALCULATE', '')],
         description="[-baq]Type of BAQ calculation to apply in the engine."
     )
     BAQGapOpenPenalty = define.real(
         name="BAQ Gap Open Penalty",
         default=40,
         description=
         "[-baqGOP]BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
     )
     DefaultBaseQualities = define.integer(
         name="Default Base Qualities",
         default=-1,
         description=
         "If reads are missing some or all base quality scores, this value will be used for all base quality scores"
     )
     DisableIndelQuals = define.boolean(
         name="Disable Indel Quals",
         default=False,
         description=
         "[-DBQ]If 'true', disables printing of base insertion and base deletion tags (with -BQSR). Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced."
     )
     DownsampletoCoverage = define.integer(
         name="Downsample to Coverage",
         description=
         "[-dcov]Coverage to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position."
     )
     DownsampletoFraction = define.real(
         name="Downsample to Fraction",
         description="[-dfrac]Fraction [0.0-1.0] of reads to downsample to")
     DownsamplingType = define.enum(
         name="Downsampling Type",
         default="null",
         values=[('NONE', 'NONE', ''), ('ALL_READS', 'ALL_READS', ''),
                 ('BY_SAMPLE', 'BY_SAMPLE', ''), ('null', 'null', '')],
         description=
         "[-dt]Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here"
     )
     EmitOriginalQuals = define.boolean(
         name="Emit Original Quals",
         default=False,
         description=
         "[-EOQ]If true, enables printing of the OQ tag with the original base qualities (with -BQSR)"
     )
     #FixMisencodedQuals= define.boolean( name = "Fix Misencoded Quals", default = False, description = "[-fixMisencodedQuals]Fix mis-encoded base quality scores")
     FixMisencodedQuals = define.enum(
         name="Fix Misencoded Quals",
         default='False',
         values=[('True', 'True', ''), ('False', 'False', ''),
                 ('Auto', 'Auto', '')],
         description=
         "[-fixMisencodedQuals]Fix mis-encoded base quality scores, select Auto to auto detect and fix"
     )
     IntervalMerging = define.enum(
         name="Interval Merging",
         default="ALL",
         values=[('ALL', 'ALL', ''),
                 ('OVERLAPPING_ONLY', 'OVERLAPPING_ONLY', '')],
         description=
         "[-im]Indicates the interval merging rule we should use for abutting intervals"
     )
     IntervalPadding = define.integer(
         name="Interval Padding",
         default=0,
         description=
         "[-ip]Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument"
     )
     IntervalSetRule = define.enum(
         name="Interval Set Rule",
         default="UNION",
         values=[('UNION', 'UNION', ''),
                 ('INTERSECTION', 'INTERSECTION', '')],
         description=
         "[-isr]Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs"
     )
     KeepProgramRecords = define.boolean(
         name="Keep Program Records",
         default=False,
         description=
         "[-kpr]Should we override the Walker's default and keep program records from the SAM header"
     )
     MaxRuntime = define.integer(
         name="Max Runtime",
         default=-1,
         description=
         "[-maxRuntime]If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure.  By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits"
     )
     MaxRuntimeUnits = define.enum(
         name="Max Runtime Units",
         default="MINUTES",
         values=[('NANOSECONDS', 'NANOSECONDS', ''),
                 ('MICROSECONDS', 'MICROSECONDS', ''),
                 ('MILLISECONDS', 'MILLISECONDS', ''),
                 ('SECONDS', 'SECONDS', ''), ('MINUTES', 'MINUTES', ''),
                 ('HOURS', 'HOURS', ''), ('DAYS', 'DAYS', '')],
         description="[-maxRuntimeUnits] The TimeUnit for maxRuntime")
     NonDeterministicRandomSeed = define.boolean(
         name="Non Deterministic Random Seed",
         default=False,
         description=
         "[-ndrs]Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run"
     )
     PedigreeString = define.string(
         name="Pedigree String",
         description="[-pedString]Pedigree string for samples")
     PedigreeValidationType = define.enum(
         name="Pedigree Validation Type",
         default="STRICT",
         values=[('STRICT', 'STRICT', ''), ('SILENT', 'SILENT', '')],
         description=
         "[-pedValidationType]How strict should we be in validating the pedigree information?"
     )
     PhoneHome = define.enum(
         name="Phone Home",
         default="STANDARD",
         values=[('NO_ET', 'NO_ET', ''), ('STANDARD', 'STANDARD', '')],
         description=
         "[-et]What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest for details."
     )
     PreserveQscoresLessThan = define.integer(
         name="Preserve Qscores Less Than",
         default=6,
         description=
         "[-preserveQ]Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)"
     )
     ReadFilter = define.string(
         name="Read Filter",
         default='BadCigar',
         description=
         "[-rf]Specify filtration criteria to apply to each read individually"
     )
     ReadGroupBlackList = define.string(
         name="Read Group Black List",
         description=
         "[-rgbl]Filters out read groups matching : or a .txt file containing the filter strings one per line."
     )
     RemoveProgramRecords = define.boolean(
         name="Remove Program Records",
         default=False,
         description=
         "[-rpr]Should we override the Walker's default and remove program records from the SAM header"
     )
     Tag = define.string(
         name="Tag",
         description=
         "[-tag]Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis"
     )
     Unsafe = define.enum(
         name="Unsafe",
         default="null",
         values=[('ALLOW_UNINDEXED_BAM', 'ALLOW_UNINDEXED_BAM', ''),
                 ('ALLOW_UNSET_BAM_SORT_ORDER',
                  'ALLOW_UNSET_BAM_SORT_ORDER', ''),
                 ('NO_READ_ORDER_VERIFICATION',
                  'NO_READ_ORDER_VERIFICATION', ''),
                 ('ALLOW_SEQ_DICT_INCOMPATIBILITY',
                  'ALLOW_SEQ_DICT_INCOMPATIBILITY', ''),
                 ('LENIENT_VCF_PROCESSING', 'LENIENT_VCF_PROCESSING', ''),
                 ('ALL', 'ALL', ''), ('null', 'null', '')],
         description=
         "[-U]If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument."
     )
     UseLegacyDownsampler = define.boolean(
         name="Use Legacy Downsampler",
         default=False,
         description=
         "Use the legacy downsampling implementation instead of the newer, less-tested implementation"
     )
     UseOriginalQualities = define.boolean(
         name="Use Original Qualities",
         default=False,
         description=
         "[-OQ]If set, use the original base quality scores from the OQ tag when present instead of the standard scores"
     )
     ValidationStrictness = define.enum(
         name="Validation Strictness",
         default="SILENT",
         values=[('SILENT', 'SILENT', ''), ('LENIENT', 'LENIENT', ''),
                 ('STRICT', 'STRICT', '')],
         description="[-S]How strict should we be with validation")
     #Groupby= define.enum( name = "Group by", default = "sample", values = [( 'sample_group','sample_group','' ),( 'sample','sample','' ),( 'library','library','' ),( 'platform_unit','platform_unit','' ),( 'chunk','chunk','' ),( 'interval','interval','' )], description = "Inputs will be grouped by selected value from this category. One output will be generated for each group.")
     DivideByIntervals = define.boolean(
         name="Divide By Intervals",
         default=False,
         description="Divide the result by Genomic Intervals")
     #Memoryperjob= define.integer( name = "Memory per job", default = 0, description = "Amount of RAM memory to be used per job. Defaults to 2048MB for Single threaded jobs,and all of the available memory on the instance for multi-threaded jobs. Set to 0 for the default value")
     #Threadsperjob= define.integer( name = "Threads per job", default = 0, description = "For tools which support multiprocessing, this value can be used to set the number of threads to be used. Set to 0 for auto-detect (use with caution,as auto-detect will find the optimal value in most cases)")
     Maximumintervalsize = define.integer(
         name="Maximum interval size",
         default=500,
         description=
         "[-maxInterval]Maximum interval size. Because the realignment algorithm is N^2, allowing too large an interval might take too long to completely realign."
     )
     Minimumreadsatlocus = define.integer(
         name="Minimum reads at locus",
         default=4,
         description=
         "[-minReads]Minimum reads at a locus to enable using the entropy calculation."
     )
     Mismatchfraction = define.real(
         name="Mismatch fraction",
         default=0,
         description=
         "[-mismatch]Fraction of base qualities needing to mismatch for a position to have high entropy. To disable this behavior, set this value to <= 0 or > 1. This feature is really only necessary when using an ungapped aligner (e.g. MAQ in the case of single-end read data) and should be used in conjunction with USE_SW' option."
     )
     Windowsize = define.integer(
         name="Window size",
         default=10,
         description=
         "[-window]Window size for calculating entropy or SNP clusters. Any two SNP calls and/or high entropy positions are considered clustered when they occur no more than this many base pairs apart."
     )
    class Params(define.Params):
        Annotation = define.string(
            name="Annotation",
            description=
            "[-A]One or more specific annotations to apply to variant calls")
        ComputeSlod = define.boolean(
            name="Compute Slod",
            default=False,
            description=
            "[-slod]If provided, we will calculate the SLOD (SB annotation)")
        Contamination = define.real(
            name="Contamination",
            default=0.05,
            description=
            "[-contamination]Fraction of contamination in sequencing data (for all samples) to aggressively remove."
        )
        ExcludeAnnotation = define.string(
            name="Exclude Annotation",
            description="[-XA]One or more specific annotations to exclude")
        GenotypeLikelihoodsModel = define.enum(
            name="Genotype Likelihoods Model",
            default="SNP",
            values=[('SNP', 'SNP', ''), ('INDEL', 'INDEL', ''),
                    ('GENERALPLOIDYSNP', 'GENERALPLOIDYSNP', ''),
                    ('GENERALPLOIDYINDEL', 'GENERALPLOIDYINDEL', ''),
                    ('BOTH', 'BOTH', '')],
            description=
            "[-glm]Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together"
        )
        GenotypingMode = define.enum(
            name="Genotyping Mode",
            default="DISCOVERY",
            values=[('DISCOVERY', 'DISCOVERY', ''),
                    ('GENOTYPE_GIVEN_ALLELES', 'GENOTYPE_GIVEN_ALLELES', '')],
            description=
            "[-gt_mode]Specifies how to determine the alternate alleles to use for genotyping"
        )
        Group = define.string(
            name="Group",
            default="Standard",
            description=
            "[-G]One or more classes/groups of annotations to apply to variant calls"
        )
        Heterozygosity = define.real(
            name="Heterozygosity",
            default=0.001,
            description=
            "[-hets] Heterozygosity value used to compute prior likelihoods for any locus"
        )
        IgnoreLaneInfo = define.boolean(
            name="Ignore Lane Info",
            default=False,
            description=
            "[-ignoreLane] Ignore lane when building error model, error model is then per-site"
        )
        IndelHeterozygosity = define.real(
            name="Indel Heterozygosity",
            default=0.000125,
            description="[-indelHeterozygosity]Heterozygosity for indel calling"
        )
        MaxDeletionFraction = define.real(
            name="Max Deletion Fraction",
            default=0.05,
            description=
            "[-deletions]Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to  1; default:0.05]"
        )
        MinBaseQualityScore = define.integer(
            name="Min Base Quality Score",
            default=17,
            description=
            "[-mbq]Minimum base quality required to consider a base for calling"
        )
        MinIndelCnt = define.integer(
            name="Min Indel Cnt",
            default=5,
            description=
            "[-minIndelCnt]Minimum number of consensus indels required to trigger genotyping run"
        )
        MinIndelFrac = define.real(
            name="Min Indel Frac",
            default=0.25,
            description=
            "[-minIndelFrac]Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles"
        )
        OutputMode = define.enum(
            name="Output Mode",
            default="EMIT_VARIANTS_ONLY",
            values=[('EMIT_VARIANTS_ONLY', 'EMIT_VARIANTS_ONLY', ''),
                    ('EMIT_ALL_CONFIDENT_SITES', 'EMIT_ALL_CONFIDENT_SITES',
                     ''), ('EMIT_ALL_SITES', 'EMIT_ALL_SITES', '')],
            description=
            "[-out_mode]Specifies which type of calls we should output")
        PairHmmImplementation = define.enum(
            name="Pair Hmm Implementation",
            default="ORIGINAL",
            values=[('EXACT', 'EXACT', ''), ('ORIGINAL', 'ORIGINAL', ''),
                    ('CACHING', 'CACHING', ''),
                    ('LOGLESS_CACHING', 'LOGLESS_CACHING', '')],
            description=
            "[-pairHMM]The PairHMM implementation to use for -glm INDEL genotype likelihood calculations"
        )
        PcrErrorRate = define.real(
            name="Pcr Error Rate",
            default=0.0001,
            description=
            "The PCR error rate to be used for computing fragment-based likelihoods"
        )
        StandCallConf = define.real(
            name="Stand Call Conf",
            default=30,
            description=
            "[-stand_call_conf]The minimum phred-scaled confidence threshold at which variants should be called"
        )
        StandEmitConf = define.real(
            name="Stand Emit Conf",
            default=30,
            description=
            "[-stand_emit_conf]The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)"
        )
        IndelGapContinuationPenalty = define.integer(
            name="Indel Gap Continuation Penalty",
            default=10,
            description=
            "[-indelGCP]Indel gap continuation penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10"
        )
        IndelGapOpenPenalty = define.integer(
            name="Indel Gap Open Penalty",
            default=45,
            description=
            "[-indelGOP]Indel gap open penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10"
        )
        MaxAlternateAlleles = define.integer(
            name="Max Alternate Alleles",
            default=6,
            description=
            "[-maxAltAlleles]Maximum number of alternate alleles to genotype")
        PNonrefModel = define.enum(
            name="P Nonref Model",
            default="EXACT_INDEPENDENT",
            values=[('EXACT_INDEPENDENT', 'EXACT_INDEPENDENT', ''),
                    ('EXACT_REFERENCE', 'EXACT_REFERENCE', ''),
                    ('EXACT_ORIGINAL', 'EXACT_ORIGINAL', ''),
                    ('EXACT_GENERAL_PLOIDY', 'EXACT_GENERAL_PLOIDY', '')],
            description=
            "[--p_nonref_model] Non-reference probability calculation model to employ"
        )

        DisableRandomization = define.boolean(
            name="Disable Randomization",
            default=False,
            description=
            "[-ndrs]Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator."
        )
        AllowPotentiallyMisencodedQuals = define.boolean(
            name="Allow Potentially Misencoded Quals",
            default=False,
            description=
            "[-allowPotentiallyMisencodedQuals] Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file."
        )
        BAQCalculationType = define.enum(
            name="BAQ Calculation Type",
            default="OFF",
            values=[('OFF', 'OFF', ''),
                    ('CALCULATE_AS_NECESSARY', 'CALCULATE_AS_NECESSARY', ''),
                    ('RECALCULATE', 'RECALCULATE', '')],
            description="[-baq]Type of BAQ calculation to apply in the engine."
        )
        BAQGapOpenPenalty = define.real(
            name="BAQ Gap Open Penalty",
            default=40,
            description=
            "[-baqGOP]BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
        )
        DefaultBaseQualities = define.integer(
            name="Default Base Qualities",
            default=-1,
            description=
            "If reads are missing some or all base quality scores, this value will be used for all base quality scores"
        )
        DisableIndelQuals = define.boolean(
            name="Disable Indel Quals",
            default=False,
            description=
            "[-DBQ]If 'true', disables printing of base insertion and base deletion tags (with -BQSR). Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced."
        )
        DownsampletoCoverage = define.integer(
            name="Downsample to Coverage",
            description=
            "[-dcov]Coverage to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position."
        )
        DownsampletoFraction = define.real(
            name="Downsample to Fraction",
            description="[-dfrac]Fraction [0.0-1.0] of reads to downsample to")
        DownsamplingType = define.enum(
            name="Downsampling Type",
            default="null",
            values=[('NONE', 'NONE', ''), ('ALL_READS', 'ALL_READS', ''),
                    ('BY_SAMPLE', 'BY_SAMPLE', ''), ('null', 'null', '')],
            description=
            "[-dt]Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here"
        )
        EmitOriginalQuals = define.boolean(
            name="Emit Original Quals",
            default=False,
            description=
            "[-EOQ]If true, enables printing of the OQ tag with the original base qualities (with -BQSR)"
        )
        FixMisencodedQuals = define.boolean(
            name="Fix Misencoded Quals",
            default=False,
            description=
            "[-fixMisencodedQuals]Fix mis-encoded base quality scores")
        IntervalMerging = define.enum(
            name="Interval Merging",
            default="ALL",
            values=[('ALL', 'ALL', ''),
                    ('OVERLAPPING_ONLY', 'OVERLAPPING_ONLY', '')],
            description=
            "[-im]Indicates the interval merging rule we should use for abutting intervals"
        )
        IntervalPadding = define.integer(
            name="Interval Padding",
            default=0,
            description=
            "[-ip]Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument"
        )
        IntervalSetRule = define.enum(
            name="Interval Set Rule",
            default="UNION",
            values=[('UNION', 'UNION', ''),
                    ('INTERSECTION', 'INTERSECTION', '')],
            description=
            "[-isr]Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs"
        )
        KeepProgramRecords = define.boolean(
            name="Keep Program Records",
            default=False,
            description=
            "[-kpr]Should we override the Walker's default and keep program records from the SAM header"
        )
        MaxRuntime = define.integer(
            name="Max Runtime",
            default=-1,
            description=
            "[-maxRuntime]If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure.  By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits"
        )
        MaxRuntimeUnits = define.enum(
            name="Max Runtime Units",
            default="MINUTES",
            values=[('NANOSECONDS', 'NANOSECONDS', ''),
                    ('MICROSECONDS', 'MICROSECONDS', ''),
                    ('MILLISECONDS', 'MILLISECONDS', ''),
                    ('SECONDS', 'SECONDS', ''), ('MINUTES', 'MINUTES', ''),
                    ('HOURS', 'HOURS', ''), ('DAYS', 'DAYS', '')],
            description="[-maxRuntimeUnits] The TimeUnit for maxRuntime")
        NonDeterministicRandomSeed = define.boolean(
            name="Non Deterministic Random Seed",
            default=False,
            description=
            "[-ndrs]Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run"
        )
        PedigreeString = define.string(
            name="Pedigree String",
            description="[-pedString]Pedigree string for samples")
        PedigreeValidationType = define.enum(
            name="Pedigree Validation Type",
            default="STRICT",
            values=[('STRICT', 'STRICT', ''), ('SILENT', 'SILENT', '')],
            description=
            "[-pedValidationType]How strict should we be in validating the pedigree information?"
        )
        PhoneHome = define.enum(
            name="Phone Home",
            default="STANDARD",
            values=[('NO_ET', 'NO_ET', ''), ('STANDARD', 'STANDARD', '')],
            description=
            "[-et]What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest for details."
        )
        PreserveQscoresLessThan = define.integer(
            name="Preserve Qscores Less Than",
            default=6,
            description=
            "[-preserveQ]Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)"
        )
        ReadFilter = define.string(
            name="Read Filter",
            default='BadCigar',
            description=
            "[-rf]Specify filtration criteria to apply to each read individually"
        )
        ReadGroupBlackList = define.string(
            name="Read Group Black List",
            description=
            "[-rgbl]Filters out read groups matching : or a .txt file containing the filter strings one per line."
        )
        RemoveProgramRecords = define.boolean(
            name="Remove Program Records",
            default=False,
            description=
            "[-rpr]Should we override the Walker's default and remove program records from the SAM header"
        )
        Tag = define.string(
            name="Tag",
            description=
            "[-tag]Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis"
        )
        Unsafe = define.enum(
            name="Unsafe",
            default="null",
            values=[('ALLOW_UNINDEXED_BAM', 'ALLOW_UNINDEXED_BAM', ''),
                    ('ALLOW_UNSET_BAM_SORT_ORDER',
                     'ALLOW_UNSET_BAM_SORT_ORDER', ''),
                    ('NO_READ_ORDER_VERIFICATION',
                     'NO_READ_ORDER_VERIFICATION', ''),
                    ('ALLOW_SEQ_DICT_INCOMPATIBILITY',
                     'ALLOW_SEQ_DICT_INCOMPATIBILITY', ''),
                    ('LENIENT_VCF_PROCESSING', 'LENIENT_VCF_PROCESSING', ''),
                    ('ALL', 'ALL', ''), ('null', 'null', '')],
            description=
            "[-U]If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument."
        )
        UseLegacyDownsampler = define.boolean(
            name="Use Legacy Downsampler",
            default=False,
            description=
            "Use the legacy downsampling implementation instead of the newer, less-tested implementation"
        )
        UseOriginalQualities = define.boolean(
            name="Use Original Qualities",
            default=False,
            description=
            "[-OQ]If set, use the original base quality scores from the OQ tag when present instead of the standard scores"
        )
        ValidationStrictness = define.enum(
            name="Validation Strictness",
            default="SILENT",
            values=[('SILENT', 'SILENT', ''), ('LENIENT', 'LENIENT', ''),
                    ('STRICT', 'STRICT', '')],
            description="[-S]How strict should we be with validation")
        #Groupby= define.enum( name = "Group by", default = "sample", values = [( 'sample_group','sample_group','' ),( 'sample','sample','' ),( 'library','library','' ),( 'platform_unit','platform_unit','' ),( 'chunk','chunk','' ),( 'interval','interval','' )], description = "Inputs will be grouped by selected value from this category. One output will be generated for each group.")
        DivideByIntervals = define.boolean(
            name=
            "Divide By Intervals, if true, it assumes '_interval' is set for each input bam file ",
            default=False,
            description="Divide the result by Genomic Intervals")
    class Params(define.Params):

        AssumeIdenticalSamples = define.boolean(
            name="Assume Identical Samples",
            default=False,
            description=
            "[--assumeIdenticalSamples]If true, assume input VCFs have identical sample sets and disjoint calls"
        )
        FilteredAreUncalled = define.boolean(
            name="Filtered Are Uncalled",
            default=False,
            description=
            "[--filteredAreUncalled]If true, then filtered VCFs are treated as uncalled, so that filtered set annotations don't appear in the combined VCF"
        )
        Filteredrecordsmergetype = define.enum(
            name="Filteredrecordsmergetype",
            default="KEEP_IF_ANY_UNFILTERED",
            values=[('KEEP_IF_ANY_UNFILTERED', 'KEEP_IF_ANY_UNFILTERED', ''),
                    ('KEEP_IF_ALL_UNFILTERED', 'KEEP_IF_ALL_UNFILTERED', ''),
                    ('KEEP_UNCONDITIONAL', 'KEEP_UNCONDITIONAL', '')],
            description=
            "[--filteredrecordsmergetype]Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields"
        )
        Genotypemergeoption = define.enum(
            name="Genotypemergeoption",
            default="null",
            values=[('UNIQUIFY', 'UNIQUIFY', ''), ('UNSORTED', 'UNSORTED', ''),
                    ('REQUIRE_UNIQUE', 'REQUIRE_UNIQUE', ''),
                    ('null', 'null', '')],
            description=
            "[--genotypemergeoption] Determines how we should merge genotype records for samples shared across the ROD files"
        )
        MergeInfoWithMaxAc = define.boolean(
            name="Merge Info With Max Ac",
            default=False,
            description=
            "[--mergeInfoWithMaxAC] If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records."
        )
        MinimalVcf = define.boolean(
            name="Minimal Vcf",
            default=False,
            description=
            "[--minimalVCF] If true, then the output VCF will contain no INFO or genotype FORMAT fields"
        )
        MinimumN = define.integer(
            name="Minimum N",
            default=1,
            description=
            "[--minimumN]Combine variants and output site only if the variant is present in at least N input files."
        )
        PrintComplexMerges = define.boolean(
            name="Print Complex Merges",
            default=False,
            description=
            "[--printComplexMerges]Print out interesting sites requiring complex compatibility merging"
        )
        SetKey = define.string(
            name="Set Key",
            default="set",
            description=
            "[--setKey]Key used in the INFO key=value tag emitted describing which set the combined VCF record came from"
        )
        SuppressCommandLineHeader = define.boolean(
            name="Suppress Command Line Header",
            default=False,
            description=
            "[--suppressCommandLineHeader] If true, do not output the header containing the command line used"
        )

        DisableRandomization = define.boolean(
            name="Disable Randomization",
            default=False,
            description=
            "[-ndrs]Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator."
        )
        AllowPotentiallyMisencodedQuals = define.boolean(
            name="Allow Potentially Misencoded Quals",
            default=False,
            description=
            "[-allowPotentiallyMisencodedQuals] Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file."
        )
        BAQCalculationType = define.enum(
            name="BAQ Calculation Type",
            default="OFF",
            values=[('OFF', 'OFF', ''),
                    ('CALCULATE_AS_NECESSARY', 'CALCULATE_AS_NECESSARY', ''),
                    ('RECALCULATE', 'RECALCULATE', '')],
            description="[-baq]Type of BAQ calculation to apply in the engine."
        )
        BAQGapOpenPenalty = define.real(
            name="BAQ Gap Open Penalty",
            default=40,
            description=
            "[-baqGOP]BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
        )
        DefaultBaseQualities = define.integer(
            name="Default Base Qualities",
            default=-1,
            description=
            "If reads are missing some or all base quality scores, this value will be used for all base quality scores"
        )
        DisableIndelQuals = define.boolean(
            name="Disable Indel Quals",
            default=False,
            description=
            "[-DBQ]If 'true', disables printing of base insertion and base deletion tags (with -BQSR). Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced."
        )
        DownsampletoCoverage = define.integer(
            name="Downsample to Coverage",
            description=
            "[-dcov]Coverage to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position."
        )
        DownsampletoFraction = define.real(
            name="Downsample to Fraction",
            description="[-dfrac]Fraction [0.0-1.0] of reads to downsample to")
        DownsamplingType = define.enum(
            name="Downsampling Type",
            default="null",
            values=[('NONE', 'NONE', ''), ('ALL_READS', 'ALL_READS', ''),
                    ('BY_SAMPLE', 'BY_SAMPLE', ''), ('null', 'null', '')],
            description=
            "[-dt]Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here"
        )
        EmitOriginalQuals = define.boolean(
            name="Emit Original Quals",
            default=False,
            description=
            "[-EOQ]If true, enables printing of the OQ tag with the original base qualities (with -BQSR)"
        )
        FixMisencodedQuals = define.boolean(
            name="Fix Misencoded Quals",
            default=False,
            description=
            "[-fixMisencodedQuals]Fix mis-encoded base quality scores")
        IntervalMerging = define.enum(
            name="Interval Merging",
            default="ALL",
            values=[('ALL', 'ALL', ''),
                    ('OVERLAPPING_ONLY', 'OVERLAPPING_ONLY', '')],
            description=
            "[-im]Indicates the interval merging rule we should use for abutting intervals"
        )
        IntervalPadding = define.integer(
            name="Interval Padding",
            default=0,
            description=
            "[-ip]Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument"
        )
        IntervalSetRule = define.enum(
            name="Interval Set Rule",
            default="UNION",
            values=[('UNION', 'UNION', ''),
                    ('INTERSECTION', 'INTERSECTION', '')],
            description=
            "[-isr]Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs"
        )
        KeepProgramRecords = define.boolean(
            name="Keep Program Records",
            default=False,
            description=
            "[-kpr]Should we override the Walker's default and keep program records from the SAM header"
        )
        MaxRuntime = define.integer(
            name="Max Runtime",
            default=-1,
            description=
            "[-maxRuntime]If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure.  By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits"
        )
        MaxRuntimeUnits = define.enum(
            name="Max Runtime Units",
            default="MINUTES",
            values=[('NANOSECONDS', 'NANOSECONDS', ''),
                    ('MICROSECONDS', 'MICROSECONDS', ''),
                    ('MILLISECONDS', 'MILLISECONDS', ''),
                    ('SECONDS', 'SECONDS', ''), ('MINUTES', 'MINUTES', ''),
                    ('HOURS', 'HOURS', ''), ('DAYS', 'DAYS', '')],
            description="[-maxRuntimeUnits] The TimeUnit for maxRuntime")
        NonDeterministicRandomSeed = define.boolean(
            name="Non Deterministic Random Seed",
            default=False,
            description=
            "[-ndrs]Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run"
        )
        PedigreeString = define.string(
            name="Pedigree String",
            description="[-pedString]Pedigree string for samples")
        PedigreeValidationType = define.enum(
            name="Pedigree Validation Type",
            default="STRICT",
            values=[('STRICT', 'STRICT', ''), ('SILENT', 'SILENT', '')],
            description=
            "[-pedValidationType]How strict should we be in validating the pedigree information?"
        )
        PhoneHome = define.enum(
            name="Phone Home",
            default="STANDARD",
            values=[('NO_ET', 'NO_ET', ''), ('STANDARD', 'STANDARD', '')],
            description=
            "[-et]What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest for details."
        )
        PreserveQscoresLessThan = define.integer(
            name="Preserve Qscores Less Than",
            default=6,
            description=
            "[-preserveQ]Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)"
        )
        ReadFilter = define.string(
            name="Read Filter",
            default='BadCigar',
            description=
            "[-rf]Specify filtration criteria to apply to each read individually"
        )
        ReadGroupBlackList = define.string(
            name="Read Group Black List",
            description=
            "[-rgbl]Filters out read groups matching : or a .txt file containing the filter strings one per line."
        )
        RemoveProgramRecords = define.boolean(
            name="Remove Program Records",
            default=False,
            description=
            "[-rpr]Should we override the Walker's default and remove program records from the SAM header"
        )
        Tag = define.string(
            name="Tag",
            description=
            "[-tag]Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis"
        )
        Unsafe = define.enum(
            name="Unsafe",
            default="null",
            values=[('ALLOW_UNINDEXED_BAM', 'ALLOW_UNINDEXED_BAM', ''),
                    ('ALLOW_UNSET_BAM_SORT_ORDER',
                     'ALLOW_UNSET_BAM_SORT_ORDER', ''),
                    ('NO_READ_ORDER_VERIFICATION',
                     'NO_READ_ORDER_VERIFICATION', ''),
                    ('ALLOW_SEQ_DICT_INCOMPATIBILITY',
                     'ALLOW_SEQ_DICT_INCOMPATIBILITY', ''),
                    ('LENIENT_VCF_PROCESSING', 'LENIENT_VCF_PROCESSING', ''),
                    ('ALL', 'ALL', ''), ('null', 'null', '')],
            description=
            "[-U]If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument."
        )
        UseLegacyDownsampler = define.boolean(
            name="Use Legacy Downsampler",
            default=False,
            description=
            "Use the legacy downsampling implementation instead of the newer, less-tested implementation"
        )
        UseOriginalQualities = define.boolean(
            name="Use Original Qualities",
            default=False,
            description=
            "[-OQ]If set, use the original base quality scores from the OQ tag when present instead of the standard scores"
        )
        ValidationStrictness = define.enum(
            name="Validation Strictness",
            default="SILENT",
            values=[('SILENT', 'SILENT', ''), ('LENIENT', 'LENIENT', ''),
                    ('STRICT', 'STRICT', '')],
            description="[-S]How strict should we be with validation")
Example #10
0
    class Params(define.Params):
        ClusterSize = define.integer(
            name="Cluster Size",
            default=3,
            description="[-cluster]The number of SNPs which make up a cluster")
        ClusterWindowSize = define.integer(
            name="Cluster Window Size",
            default=0,
            description=
            "[-window]The window size (in bases) in which to evaluate clustered SNPs"
        )
        FiltersName = define.string(
            name="FilterNames",
            description=
            "[--filterName] Names to use for the list of filters, divide them using comma"
        )
        Filters = define.string(
            name="Filters",
            description=
            "[-filter] One or more expression used with INFO fields to filter, divide them using comma"
        )
        GenotypefiltersName = define.string(
            name="Genotype filters Names",
            description=
            "[--genotypeFilterName] Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered, divide them using comma"
        )
        Genotypefilters = define.string(
            name="Genotype filters",
            description=
            "[-G_filter] One or more expression used with FORMAT (sample/genotype-level) fields to filter (see documentation guide for more info), divide them using comma"
        )

        InvalidatePreviousFilters = define.boolean(
            name="Invalidate Previous Filters",
            default=False,
            description=
            "[--invalidatePreviousFilters]Remove previous filters applied to the VCF"
        )
        MaskExtension = define.integer(
            name="Mask Extension",
            default=0,
            description=
            "[-maskExtend]How many bases beyond records from a provided 'mask' rod should variants be filtered"
        )
        MaskName = define.string(
            name="Mask Name",
            default="Mask",
            description=
            "[--maskName]The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call"
        )
        MissingValuesInExpressionsShouldEvaluateAsFailing = define.boolean(
            name="Missing Values In Expressions Should Evaluate As Failing",
            default=False,
            description=
            "[--missingValuesInExpressionsShouldEvaluateAsFailing] When evaluating the JEXL expressions, missing values should be considered failing the expression"
        )

        DisableRandomization = define.boolean(
            name="Disable Randomization",
            default=False,
            description=
            "[-ndrs]Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator."
        )
        AllowPotentiallyMisencodedQuals = define.boolean(
            name="Allow Potentially Misencoded Quals",
            default=False,
            description=
            "[-allowPotentiallyMisencodedQuals] Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file."
        )
        BAQCalculationType = define.enum(
            name="BAQ Calculation Type",
            default="OFF",
            values=[('OFF', 'OFF', ''),
                    ('CALCULATE_AS_NECESSARY', 'CALCULATE_AS_NECESSARY', ''),
                    ('RECALCULATE', 'RECALCULATE', '')],
            description="[-baq]Type of BAQ calculation to apply in the engine."
        )
        BAQGapOpenPenalty = define.real(
            name="BAQ Gap Open Penalty",
            default=40,
            description=
            "[-baqGOP]BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
        )
        DefaultBaseQualities = define.integer(
            name="Default Base Qualities",
            default=-1,
            description=
            "If reads are missing some or all base quality scores, this value will be used for all base quality scores"
        )
        DisableIndelQuals = define.boolean(
            name="Disable Indel Quals",
            default=False,
            description=
            "[-DBQ]If 'true', disables printing of base insertion and base deletion tags (with -BQSR). Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced."
        )
        DownsampletoCoverage = define.integer(
            name="Downsample to Coverage",
            description=
            "[-dcov]Coverage to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position."
        )
        DownsampletoFraction = define.real(
            name="Downsample to Fraction",
            description="[-dfrac]Fraction [0.0-1.0] of reads to downsample to")
        DownsamplingType = define.enum(
            name="Downsampling Type",
            default="null",
            values=[('NONE', 'NONE', ''), ('ALL_READS', 'ALL_READS', ''),
                    ('BY_SAMPLE', 'BY_SAMPLE', ''), ('null', 'null', '')],
            description=
            "[-dt]Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here"
        )
        EmitOriginalQuals = define.boolean(
            name="Emit Original Quals",
            default=False,
            description=
            "[-EOQ]If true, enables printing of the OQ tag with the original base qualities (with -BQSR)"
        )
        FixMisencodedQuals = define.boolean(
            name="Fix Misencoded Quals",
            default=False,
            description=
            "[-fixMisencodedQuals]Fix mis-encoded base quality scores")
        IntervalMerging = define.enum(
            name="Interval Merging",
            default="ALL",
            values=[('ALL', 'ALL', ''),
                    ('OVERLAPPING_ONLY', 'OVERLAPPING_ONLY', '')],
            description=
            "[-im]Indicates the interval merging rule we should use for abutting intervals"
        )
        IntervalPadding = define.integer(
            name="Interval Padding",
            default=0,
            description=
            "[-ip]Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument"
        )
        IntervalSetRule = define.enum(
            name="Interval Set Rule",
            default="UNION",
            values=[('UNION', 'UNION', ''),
                    ('INTERSECTION', 'INTERSECTION', '')],
            description=
            "[-isr]Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs"
        )
        KeepProgramRecords = define.boolean(
            name="Keep Program Records",
            default=False,
            description=
            "[-kpr]Should we override the Walker's default and keep program records from the SAM header"
        )
        MaxRuntime = define.integer(
            name="Max Runtime",
            default=-1,
            description=
            "[-maxRuntime]If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure.  By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits"
        )
        MaxRuntimeUnits = define.enum(
            name="Max Runtime Units",
            default="MINUTES",
            values=[('NANOSECONDS', 'NANOSECONDS', ''),
                    ('MICROSECONDS', 'MICROSECONDS', ''),
                    ('MILLISECONDS', 'MILLISECONDS', ''),
                    ('SECONDS', 'SECONDS', ''), ('MINUTES', 'MINUTES', ''),
                    ('HOURS', 'HOURS', ''), ('DAYS', 'DAYS', '')],
            description="[-maxRuntimeUnits] The TimeUnit for maxRuntime")
        NonDeterministicRandomSeed = define.boolean(
            name="Non Deterministic Random Seed",
            default=False,
            description=
            "[-ndrs]Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run"
        )
        PedigreeString = define.string(
            name="Pedigree String",
            description="[-pedString]Pedigree string for samples")
        PedigreeValidationType = define.enum(
            name="Pedigree Validation Type",
            default="STRICT",
            values=[('STRICT', 'STRICT', ''), ('SILENT', 'SILENT', '')],
            description=
            "[-pedValidationType]How strict should we be in validating the pedigree information?"
        )
        PhoneHome = define.enum(
            name="Phone Home",
            default="STANDARD",
            values=[('NO_ET', 'NO_ET', ''), ('STANDARD', 'STANDARD', '')],
            description=
            "[-et]What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest for details."
        )
        PreserveQscoresLessThan = define.integer(
            name="Preserve Qscores Less Than",
            default=6,
            description=
            "[-preserveQ]Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)"
        )
        ReadFilter = define.string(
            name="Read Filter",
            default='BadCigar',
            description=
            "[-rf]Specify filtration criteria to apply to each read individually"
        )
        ReadGroupBlackList = define.string(
            name="Read Group Black List",
            description=
            "[-rgbl]Filters out read groups matching : or a .txt file containing the filter strings one per line."
        )
        RemoveProgramRecords = define.boolean(
            name="Remove Program Records",
            default=False,
            description=
            "[-rpr]Should we override the Walker's default and remove program records from the SAM header"
        )
        Tag = define.string(
            name="Tag",
            description=
            "[-tag]Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis"
        )
        Unsafe = define.enum(
            name="Unsafe",
            default="null",
            values=[('ALLOW_UNINDEXED_BAM', 'ALLOW_UNINDEXED_BAM', ''),
                    ('ALLOW_UNSET_BAM_SORT_ORDER',
                     'ALLOW_UNSET_BAM_SORT_ORDER', ''),
                    ('NO_READ_ORDER_VERIFICATION',
                     'NO_READ_ORDER_VERIFICATION', ''),
                    ('ALLOW_SEQ_DICT_INCOMPATIBILITY',
                     'ALLOW_SEQ_DICT_INCOMPATIBILITY', ''),
                    ('LENIENT_VCF_PROCESSING', 'LENIENT_VCF_PROCESSING', ''),
                    ('ALL', 'ALL', ''), ('null', 'null', '')],
            description=
            "[-U]If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument."
        )
        UseLegacyDownsampler = define.boolean(
            name="Use Legacy Downsampler",
            default=False,
            description=
            "Use the legacy downsampling implementation instead of the newer, less-tested implementation"
        )
        UseOriginalQualities = define.boolean(
            name="Use Original Qualities",
            default=False,
            description=
            "[-OQ]If set, use the original base quality scores from the OQ tag when present instead of the standard scores"
        )
        ValidationStrictness = define.enum(
            name="Validation Strictness",
            default="SILENT",
            values=[('SILENT', 'SILENT', ''), ('LENIENT', 'LENIENT', ''),
                    ('STRICT', 'STRICT', '')],
            description="[-S]How strict should we be with validation")
    class Params(define.Params):
        BinaryTagName = define.string(
            name="Binary Tag Name",
            description="[-bintag] the binary tag covariate name if using it")
        Covariate = define.enum(
            name="Covariate",
            values=[],
            description=
            "[-cov]One or more covariates to be used in the recalibration. Can be specified multiple times"
        )
        DeletionsDefaultQuality = define.integer(
            name="Deletions Default Quality",
            default=45,
            description=
            "[-ddq] default quality for the base deletions covariate")
        IndelsContextSize = define.integer(
            name="Indels Context Size",
            default=3,
            description=
            "[-ics] size of the k-mer context to be used for base insertions and deletions"
        )
        Disableindelquals = define.boolean(
            name="Disable indel quals",
            default=True,
            description=
            "[--disable_indel_quals] Disable indel quality recalibration. Must be set to true in GATK Lite."
        )
        InsertionsDefaultQuality = define.integer(
            name="Insertions Default Quality",
            default=45,
            description=
            "[-idq] default quality for the base insertions covariate")
        LowQualityTail = define.integer(
            name="Low Quality Tail",
            default=2,
            description=
            "[-lqt] minimum quality for the bases in the tail of the reads to be considered"
        )
        MaximumCycleValue = define.integer(
            name="Maximum Cycle Value",
            default=500,
            description=
            "[-maxCycle ] the maximum cycle value permitted for the Cycle covariate"
        )
        MismatchesContextSize = define.integer(
            name="Mismatches Context Size",
            default=2,
            description=
            "[-maxCycle]size of the k-mer context to be used for base mismatches"
        )
        MismatchesDefaultQuality = define.integer(
            name="Mismatches Default Quality",
            default=-1,
            description=
            "[-mdq]default quality for the base mismatches covariate")
        NoStandardCovs = define.boolean(
            name="No Standard Covs",
            default=False,
            description=
            "[-noStandard]Do not use the standard set of covariates, but rather just the ones listed using the -cov argument. Cannot be used if grouped by interval."
        )
        QuantizingLevels = define.integer(
            name="Quantizing Levels",
            default=16,
            description=
            "[-ql] number of distinct quality scores in the quantized output")
        SolidNocallStrategy = define.enum(
            name="Solid Nocall Strategy",
            default="THROW_EXCEPTION",
            values=[('THROW_EXCEPTION', 'THROW_EXCEPTION', ''),
                    ('LEAVE_READ_UNRECALIBRATED', 'LEAVE_READ_UNRECALIBRATED',
                     ''), ('PURGE_READ', 'PURGE_READ', '')],
            description=
            "[--solid_nocall_strategy]Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ"
        )
        SolidRecalMode = define.enum(
            name="Solid Recal Mode",
            default="SET_Q_ZERO",
            values=[('DO_NOTHING', 'DO_NOTHING', ''),
                    ('SET_Q_ZERO', 'SET_Q_ZERO', ''),
                    ('SET_Q_ZERO_BASE_N', 'SET_Q_ZERO_BASE_N', ''),
                    ('REMOVE_REF_BIAS', 'REMOVE_REF_BIAS', '')],
            description=
            "[-sMode]How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS"
        )
        BqsrBaqGapOpenPenalty = define.real(
            name="Bqsr Baq Gap Open Penalty",
            default=40,
            description=
            "[-bqsrBAQGOP]BQSR BAQ gap open penalty (Phred Scaled).  Default value is 40.  30 is perhaps better for whole genome call sets"
        )
        RunWithoutDbsnpPotentiallyRuiningQuality = define.boolean(
            name="Run Without Dbsnp Potentially Ruining Quality",
            default=False,
            description=
            "[-run_without_dbsnp_potentially_ruining_quality] If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only."
        )

        DisableRandomization = define.boolean(
            name="Disable Randomization",
            default=False,
            description=
            "[-ndrs]Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator."
        )
        AllowPotentiallyMisencodedQuals = define.boolean(
            name="Allow Potentially Misencoded Quals",
            default=False,
            description=
            "[-allowPotentiallyMisencodedQuals] Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file."
        )
        BAQCalculationType = define.enum(
            name="BAQ Calculation Type",
            default="OFF",
            values=[('OFF', 'OFF', ''),
                    ('CALCULATE_AS_NECESSARY', 'CALCULATE_AS_NECESSARY', ''),
                    ('RECALCULATE', 'RECALCULATE', '')],
            description="[-baq]Type of BAQ calculation to apply in the engine."
        )
        BAQGapOpenPenalty = define.real(
            name="BAQ Gap Open Penalty",
            default=40,
            description=
            "[-baqGOP]BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
        )
        DefaultBaseQualities = define.integer(
            name="Default Base Qualities",
            default=-1,
            description=
            "If reads are missing some or all base quality scores, this value will be used for all base quality scores"
        )
        DisableIndelQuals = define.boolean(
            name="Disable Indel Quals",
            default=False,
            description=
            "[-DBQ]If 'true', disables printing of base insertion and base deletion tags (with -BQSR). Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced."
        )
        DownsampletoCoverage = define.integer(
            name="Downsample to Coverage",
            description=
            "[-dcov]Coverage to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position."
        )
        DownsampletoFraction = define.real(
            name="Downsample to Fraction",
            description="[-dfrac]Fraction [0.0-1.0] of reads to downsample to")
        DownsamplingType = define.enum(
            name="Downsampling Type",
            default="null",
            values=[('NONE', 'NONE', ''), ('ALL_READS', 'ALL_READS', ''),
                    ('BY_SAMPLE', 'BY_SAMPLE', ''), ('null', 'null', '')],
            description=
            "[-dt]Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here"
        )
        EmitOriginalQuals = define.boolean(
            name="Emit Original Quals",
            default=False,
            description=
            "[-EOQ]If true, enables printing of the OQ tag with the original base qualities (with -BQSR)"
        )
        FixMisencodedQuals = define.boolean(
            name="Fix Misencoded Quals",
            default=False,
            description=
            "[-fixMisencodedQuals]Fix mis-encoded base quality scores")
        IntervalMerging = define.enum(
            name="Interval Merging",
            default="ALL",
            values=[('ALL', 'ALL', ''),
                    ('OVERLAPPING_ONLY', 'OVERLAPPING_ONLY', '')],
            description=
            "[-im]Indicates the interval merging rule we should use for abutting intervals"
        )
        IntervalPadding = define.integer(
            name="Interval Padding",
            default=0,
            description=
            "[-ip]Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument"
        )
        IntervalSetRule = define.enum(
            name="Interval Set Rule",
            default="UNION",
            values=[('UNION', 'UNION', ''),
                    ('INTERSECTION', 'INTERSECTION', '')],
            description=
            "[-isr]Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs"
        )
        KeepProgramRecords = define.boolean(
            name="Keep Program Records",
            default=False,
            description=
            "[-kpr]Should we override the Walker's default and keep program records from the SAM header"
        )
        MaxRuntime = define.integer(
            name="Max Runtime",
            default=-1,
            description=
            "[-maxRuntime]If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure.  By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits"
        )
        MaxRuntimeUnits = define.enum(
            name="Max Runtime Units",
            default="MINUTES",
            values=[('NANOSECONDS', 'NANOSECONDS', ''),
                    ('MICROSECONDS', 'MICROSECONDS', ''),
                    ('MILLISECONDS', 'MILLISECONDS', ''),
                    ('SECONDS', 'SECONDS', ''), ('MINUTES', 'MINUTES', ''),
                    ('HOURS', 'HOURS', ''), ('DAYS', 'DAYS', '')],
            description="[-maxRuntimeUnits] The TimeUnit for maxRuntime")
        NonDeterministicRandomSeed = define.boolean(
            name="Non Deterministic Random Seed",
            default=False,
            description=
            "[-ndrs]Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run"
        )
        PedigreeString = define.string(
            name="Pedigree String",
            description="[-pedString]Pedigree string for samples")
        PedigreeValidationType = define.enum(
            name="Pedigree Validation Type",
            default="STRICT",
            values=[('STRICT', 'STRICT', ''), ('SILENT', 'SILENT', '')],
            description=
            "[-pedValidationType]How strict should we be in validating the pedigree information?"
        )
        PhoneHome = define.enum(
            name="Phone Home",
            default="STANDARD",
            values=[('NO_ET', 'NO_ET', ''), ('STANDARD', 'STANDARD', '')],
            description=
            "[-et]What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest for details."
        )
        PreserveQscoresLessThan = define.integer(
            name="Preserve Qscores Less Than",
            default=6,
            description=
            "[-preserveQ]Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)"
        )
        ReadFilter = define.string(
            name="Read Filter",
            default='BadCigar',
            description=
            "[-rf]Specify filtration criteria to apply to each read individually"
        )
        ReadGroupBlackList = define.string(
            name="Read Group Black List",
            description=
            "[-rgbl]Filters out read groups matching : or a .txt file containing the filter strings one per line."
        )
        RemoveProgramRecords = define.boolean(
            name="Remove Program Records",
            default=False,
            description=
            "[-rpr]Should we override the Walker's default and remove program records from the SAM header"
        )
        Tag = define.string(
            name="Tag",
            description=
            "[-tag]Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis"
        )
        Unsafe = define.enum(
            name="Unsafe",
            default="null",
            values=[('ALLOW_UNINDEXED_BAM', 'ALLOW_UNINDEXED_BAM', ''),
                    ('ALLOW_UNSET_BAM_SORT_ORDER',
                     'ALLOW_UNSET_BAM_SORT_ORDER', ''),
                    ('NO_READ_ORDER_VERIFICATION',
                     'NO_READ_ORDER_VERIFICATION', ''),
                    ('ALLOW_SEQ_DICT_INCOMPATIBILITY',
                     'ALLOW_SEQ_DICT_INCOMPATIBILITY', ''),
                    ('LENIENT_VCF_PROCESSING', 'LENIENT_VCF_PROCESSING', ''),
                    ('ALL', 'ALL', ''), ('null', 'null', '')],
            description=
            "[-U]If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument."
        )
        UseLegacyDownsampler = define.boolean(
            name="Use Legacy Downsampler",
            default=False,
            description=
            "Use the legacy downsampling implementation instead of the newer, less-tested implementation"
        )
        UseOriginalQualities = define.boolean(
            name="Use Original Qualities",
            default=False,
            description=
            "[-OQ]If set, use the original base quality scores from the OQ tag when present instead of the standard scores"
        )
        ValidationStrictness = define.enum(
            name="Validation Strictness",
            default="SILENT",
            values=[('SILENT', 'SILENT', ''), ('LENIENT', 'LENIENT', ''),
                    ('STRICT', 'STRICT', '')],
            description="[-S]How strict should we be with validation")
Example #12
0
 class Params(define.Params):
     Minimumlengthmatch= define.real( name = "Minimum length match", default = 2.2, description = "[-s]: Log scale for adapter minimum-length-match (2.2)")
     Adapteroccurrencethreshold= define.real( name = "Adapter occurrence threshold", default = 0.25, description = "[-t]: % occurrence threshold before adapter clipping (0.25)")
     Mincliplength= define.integer( name = "Min clip length", default = 1, description = "[-m]: Minimum clip length, overrides scaled auto (1)")
     Maxadapterdifference= define.integer( name = "Max adapter difference", default = 10, description = "[-p]: Maximum adapter difference percentage (10)")
     Setalldefaultparameterstozerodonothing= define.boolean( name = "Set all default parameters to zero/do nothing", default = False, description = "[-0]: default False")
     Minremainingsequencelength= define.integer( name = "Min remaining sequence length", default = 19, description = "[-l]: Minimum remaining sequence length (19)")
     Maxremainingsequencelength= define.integer( name = "Max remaining sequence length", description = "[-L]: Maximum remaining sequence length")
     Removeduplicatereads= define.integer( name = "Remove duplicate reads", default = 0, description = "[-D]: Read_1 has an identical N bases (0)")
     sKewPercentage= define.integer( name = "sKew Percentage", default = 2, description = "[-k]: If any nucleotide is less than the skew percentage, then the whole cycle is removed (2). Set the skew (-k) or N-pct (-x) to 0 to turn it off, this should be done for miRNA, amplicon and other low-complexity situations.")
     Badreadpercentagethreshold= define.integer( name = "Bad read percentage threshold", default = 20, description = "[-x]: 'N' (Bad read) percentage causing cycle removal from ALL read (20). Set the skew (-k) or N-pct (-x) to 0 to turn it off, this should be done for miRNA, amplicon and other low-complexity situations.")
     Qualitythreshold= define.integer( name = "Quality threshold", default = 7, description = "[-q]: Quality threshold causing base removal (7)")
     Trimmingwindowsize= define.integer( name = "Trimming window size", default = 1, description = "[-w]: Window-size for quality trimming (1)")
     Removehomopolymerreads= define.boolean( name = "Remove homopolymer reads", default = False, description = "[-H]: Remove >95% homopolymer reads")
     IlluminaPF= define.boolean( name = "Illumina PF", default = False, description = "[-U|u]: Force disable/enable Illumina PF filtering. Values are -u: disable (default); -U: enable")
     DonttrimNs= define.boolean( name = "Don't trim N's", default = False, description = "[-R]: Don't remove N's from the fronts/ends of reads")
     Subsampling= define.integer( name = "Subsampling", default = 300000, description = "[-C]: Number of reads to use for subsampling (300k)")
     Phredscale= define.integer( name = "Phred-scale", description = "[-P]: Phred-scale (auto-determined)")
     Dontclip= define.boolean( name = "Don't clip", default = False, description = "[-n]: Just output what would be done")
     Onlykeepclippedreads= define.boolean( name = "Only keep clipped reads", default = False, description = "[-K]: Only keep clipped reads")
     Saveskippedreads= define.boolean( name = "Save skipped reads", default = False, description = "[-S]: Output FASTQ files skipped reads on the 'Skipped Reads' output.")
     Minimummeanqualityscore= define.real( name = "Minimum mean quality score", description = "[--qual-mean]: Evaluated after clipping/trimming")
     Minimummeanqualityscoreappliestosecondnonbarcodereadonly= define.real( name = "Minimum mean quality score, applies to second non-barcode read only", description = "[--mate-qual-mean]: Evaluated after clipping/trimming")
     Qualitygreaterthanthreshold= define.string( name = "Quality greater than threshold", description = "[--qual-gt NUM,THR]: Evaluated after clipping/trimming, At least NUM quals > THR")
     Qualitygreaterthanthresholdappliestosecondnonbarcodereadonly= define.string( name = "Quality greater than threshold, applies to second non-barcode read only", description = "[--mate-qual-gt NUM,THR]:Evaluated after clipping/trimming, At least NUM quals > THR")
     MaximumNcallsinareadcanbea= define.real( name = "Maximum N-calls in a read (can be a %)", description = "[--max-ns]: Evaluated after clipping/trimming")
     MaximumNcallsinareadcanbeaappliestosecondnonbarcodereadonly= define.real( name = "Maximum N-calls in a read (can be a %), applies to second non-barcode read only", description = "[--mate-max-ns]: Evaluated after clipping/trimming")
     Homopolymerfilterpercentageasnumber= define.integer( name = "Homopolymer filter percentage, as number", description = "[--homopolymer-pct]: Homopolymer filter percentage, evaluated after clipping/trimming")
     Complexityfilterpercent= define.integer( name = "Complexity filter percent", description = "[--lowcomplex-pct]: Complexity filter percent (95)")
     AdjustcycleCYCnegativeoffsetfromendbyamountAMT= define.string( name = "Adjust cycle CYC (negative - offset from end) by amount AMT", description = "[--cycle-adjust CYC,AMT] Adjust cycle CYC (negative - offset from end) by amount AMT")
     AdjustscoreSCOREbyamountAMT= define.string( name = "Adjust score SCORE by amount AMT", description = "[--phred-adjust SCORE,AMT]: Adjust score SCORE by amount AMT")