Ejemplo n.º 1
0
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("user",
                   str,
                   doc=InputDocumentation(None,
                                          quality=InputQualityType.user)),
         ToolInput(
             "static",
             str,
             doc=InputDocumentation(None, quality=InputQualityType.static),
         ),
         ToolInput(
             "configuration",
             str,
             doc=InputDocumentation(None,
                                    quality=InputQualityType.configuration),
         ),
         ToolInput("none", str, doc=InputDocumentation(None, quality=None)),
     ]
Ejemplo n.º 2
0
    def add_inputs(self):
        # INPUTS
        self.input(
            "normal_inputs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of NORMAL FastqGz pairs. These are aligned separately and merged to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example='["normal_R1.fastq.gz", "normal_R2.fastq.gz"]',
            ),
        )
        self.input(
            "tumor_inputs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of TUMOR FastqGz pairs. These are aligned separately and merged to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example='["tumor_R1.fastq.gz", "tumor_R2.fastq.gz"]',
            ),
        )
        self.input(
            "normal_name",
            String(),
            doc=InputDocumentation(
                "Sample name for the NORMAL sample from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA24385_normal",
            ),
        )
        self.input(
            "tumor_name",
            String(),
            doc=InputDocumentation(
                "Sample name for the TUMOR sample from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA24385_tumor",
            ),
        )

        self.add_inputs_for_reference()
        self.add_inputs_for_intervals()
        self.add_inputs_for_configuration()
    def constructor(self):
        self.input(
            "vcf",
            VcfTabix,
            doc=InputDocumentation(
                doc="",
                source="gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.tile_db_header.vcf",
            ),
        )

        stp = self.step("stp", GetSizeTool(vcf=self.vcf))

        self.capture_outputs_from_step(stp)
Ejemplo n.º 4
0
    def setUpClass(cls):
        wf = WorkflowBuilder("test_workflow_input_collection")

        cls.inmap = {
            "user": InputQualityType.user,
            "static": InputQualityType.static,
            "configuration": InputQualityType.configuration,
            "none": None,
        }

        for i, itype in cls.inmap.items():
            wf.input(i, str, doc=InputDocumentation(None, quality=itype))

        cls.wf = wf
 def add_inputs_for_configuration(self):
     super().add_inputs_for_configuration()
     # facets
     self.input("pseudo_snps", Int(optional=True))
     self.input("max_depth", Int(optional=True))
     self.input("everything", Boolean(optional=True))
     self.input("genome", String(optional=True))
     self.input("cval", Int(optional=True))
     self.input("purity_cval", Int(optional=True))
     self.input("normal_depth", Int(optional=True))
     # vardict
     self.input(
         "allele_freq_threshold",
         Float,
         default=0.05,
         doc=InputDocumentation(
             "The threshold for VarDict's allele frequency, default: 0.05 or 5%",
             quality=InputQualityType.configuration,
         ),
     )
     self.input("minMappingQual", Int(optional=True))
     self.input("filter", String(optional=True))
Ejemplo n.º 6
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             tag="inp",
             input_type=Array(Bam, optional=True),
             prefix="--input",
             separate_value_from_prefix=True,
             prefix_applies_to_all_elements=True,
             doc=InputDocumentation(
                 doc="(-I) BAM/SAM/CRAM file containing reads."
                 " This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="outputFilename",
             input_type=Filename(extension=".bam"),
             prefix="--output",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-O) Write output to this BAM filename Required."),
         ),
         ToolInput(
             tag="reference",
             input_type=FastaWithIndexes(optional=True),
             prefix="--reference",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-R) Reference sequence file Required."),
         ),
         ToolInput(
             tag="addOutputSamProgramRecord",
             input_type=Boolean(optional=True),
             prefix="--add-output-sam-program-record",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-add-output-sam-program-record)  If true, adds a PG tag to created SAM/BAM/CRAM files.  "
                 "Default value: true. Possible values: {true, false} "),
         ),
         ToolInput(
             tag="addOutputVcfCommandLine",
             input_type=Boolean(optional=True),
             prefix="--add-output-vcf-command-line",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-add-output-vcf-command-line)  If true, adds a command line header line to created VCF files."
                 "Default value: true. Possible values: {true, false} "),
         ),
         ToolInput(
             tag="arguments_file",
             input_type=File(optional=True),
             prefix="--arguments_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "read one or more arguments files and add them to the command line This argument may be "
                 "specified 0 or more times. Default value: null. "),
         ),
         ToolInput(
             tag="cloudIndexPrefetchBuffer",
             input_type=Int(optional=True),
             prefix="--cloud-index-prefetch-buffer",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-CIPB)  Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset.  Default value: -1. "
             ),
         ),
         ToolInput(
             tag="cloudPrefetchBuffer",
             input_type=Int(optional=True),
             prefix="--cloud-prefetch-buffer",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-CPB)  Size of the cloud-only prefetch buffer (in MB; 0 to disable).  Default value: 40. "
             ),
         ),
         ToolInput(
             tag="createOutputBamIndex",
             input_type=Boolean(optional=True),
             default=True,
             prefix="--create-output-bam-index",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OBI)  If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file.  Default value: true. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="createOutputBamMd5",
             input_type=Boolean(optional=True),
             prefix="--create-output-bam-md5",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OBM)  If true, create a MD5 digest for any BAM/SAM/CRAM file created  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="createOutputVariantIndex",
             input_type=Boolean(optional=True),
             prefix="--create-output-variant-index",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OVI)  If true, create a VCF index when writing a coordinate-sorted VCF file.  Default value: true. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="createOutputVariantMd5",
             input_type=Boolean(optional=True),
             prefix="--create-output-variant-md5",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OVM)  If true, create a a MD5 digest any VCF file created.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="disableBamIndexCaching",
             input_type=Boolean(optional=True),
             prefix="--disable-bam-index-caching",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-DBIC)  If true, don't cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified.  Caching is automatically disabled if there are no intervals specified.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="disableReadFilter",
             input_type=String(optional=True),
             prefix="--disable-read-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-DF)  Read filters to be disabled before analysis  This argument may be specified 0 or more times. Default value: null. Possible Values: {AllowAllReadsReadFilter}"
             ),
         ),
         ToolInput(
             tag="disableSequenceDictionaryValidation",
             input_type=Boolean(optional=True),
             prefix="--disable-sequence-dictionary-validation",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-disable-sequence-dictionary-validation)  If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="doNotFixOverhangs",
             input_type=Boolean(optional=True),
             prefix="--do-not-fix-overhangs",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="excludeIntervals",
             input_type=Boolean(optional=True),
             prefix="--exclude-intervals",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-XL) This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="gatkConfigFile",
             input_type=String(optional=True),
             prefix="--gatk-config-file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "A configuration file to use with the GATK. Default value: null."
             ),
         ),
         ToolInput(
             tag="gcsMaxRetries",
             input_type=Int(optional=True),
             prefix="--gcs-max-retries",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-gcs-retries)  If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection  Default value: 20. "
             ),
         ),
         ToolInput(
             tag="gcsProjectForRequesterPays",
             input_type=String(optional=True),
             prefix="--gcs-project-for-requester-pays",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Project to bill when accessing 'requester pays' buckets. If unset, these buckets cannot be accessed.  Default value: . "
             ),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-h) display the help message Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="intervalExclusionPadding",
             input_type=Int(optional=True),
             prefix="--interval-exclusion-padding",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-ixp)  Amount of padding (in bp) to add to each interval you are excluding.  Default value: 0. "
             ),
         ),
         ToolInput(
             tag="intervalMergingRule",
             input_type=Boolean(optional=True),
             prefix="--interval-merging-rule",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-imr)  Interval merging rule for abutting intervals  Default value: ALL. Possible values: {ALL, OVERLAPPING_ONLY} "
             ),
         ),
         ToolInput(
             tag="intervalPadding",
             input_type=Boolean(optional=True),
             prefix="--interval-padding",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="(-ip) Default value: 0."),
         ),
         ToolInput(
             tag="intervalSetRule",
             input_type=Boolean(optional=True),
             prefix="--interval-set-rule",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-isr)  Set merging approach to use for combining interval inputs  Default value: UNION. Possible values: {UNION, INTERSECTION} "
             ),
         ),
         ToolInput(
             tag="intervals",
             input_type=String(optional=True),
             prefix="--intervals",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-L) One or more genomic intervals over which to operate This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="lenient",
             input_type=Boolean(optional=True),
             prefix="--lenient",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-LE) Lenient processing of VCF files Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="maxBasesInOverhang",
             input_type=Int(optional=True),
             prefix="--max-bases-in-overhang",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " max number of bases allowed in the overhang  Default value: 40. "
             ),
         ),
         ToolInput(
             tag="maxMismatchesInOverhang",
             input_type=Int(optional=True),
             prefix="--max-mismatches-in-overhang",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " max number of mismatches allowed in the overhang  Default value: 1. "
             ),
         ),
         ToolInput(
             tag="processSecondaryAlignments",
             input_type=Boolean(optional=True),
             prefix="--process-secondary-alignments",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " have the walker split secondary alignments (will still repair MC tag without it)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="quiet",
             input_type=Boolean(optional=True),
             prefix="--QUIET",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to suppress job-summary info on System.err. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="readFilter",
             input_type=String(optional=True),
             prefix="--read-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-RF) Read filters to be applied before analysis This argument may be specified 0 or more times. Default value: null. Possible Values: {AlignmentAgreesWithHeaderReadFilter, AllowAllReadsReadFilter, AmbiguousBaseReadFilter, CigarContainsNoNOperator, FirstOfPairReadFilter, FragmentLengthReadFilter, GoodCigarReadFilter, HasReadGroupReadFilter, IntervalOverlapReadFilter, LibraryReadFilter, MappedReadFilter, MappingQualityAvailableReadFilter, MappingQualityNotZeroReadFilter, MappingQualityReadFilter, MatchingBasesAndQualsReadFilter, MateDifferentStrandReadFilter, MateOnSameContigOrNoMappedMateReadFilter, MateUnmappedAndUnmappedReadFilter, MetricsReadFilter, NonChimericOriginalAlignmentReadFilter, NonZeroFragmentLengthReadFilter, NonZeroReferenceLengthAlignmentReadFilter, NotDuplicateReadFilter, NotOpticalDuplicateReadFilter, NotSecondaryAlignmentReadFilter, NotSupplementaryAlignmentReadFilter, OverclippedReadFilter, PairedReadFilter, PassesVendorQualityCheckReadFilter, PlatformReadFilter, PlatformUnitReadFilter, PrimaryLineReadFilter, ProperlyPairedReadFilter, ReadGroupBlackListReadFilter, ReadGroupReadFilter, ReadLengthEqualsCigarLengthReadFilter, ReadLengthReadFilter, ReadNameReadFilter, ReadStrandFilter, SampleReadFilter, SecondOfPairReadFilter, SeqIsStoredReadFilter, SoftClippedReadFilter, ValidAlignmentEndReadFilter, ValidAlignmentStartReadFilter, WellformedReadFilter}"
             ),
         ),
         ToolInput(
             tag="readIndex",
             input_type=String(optional=True),
             prefix="--read-index",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-read-index)  Indices to use for the read inputs. If specified, an index must be provided for every read input and in the same order as the read inputs. If this argument is not specified, the path to the index for each input will be inferred automatically.  This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="readValidationStringency",
             input_type=Boolean(optional=True),
             prefix="--read-validation-stringency",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-VS)  Validation stringency for all SAM/BAM/CRAM/SRA files read by this program.  The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.  Default value: SILENT. Possible values: {STRICT, LENIENT, SILENT} "
             ),
         ),
         ToolInput(
             tag="refactorCigarString",
             input_type=Boolean(optional=True),
             prefix="--refactor-cigar-string",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-fixNDN)  refactor cigar string with NDN elements to one element  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="secondsBetweenProgressUpdates",
             input_type=Double(optional=True),
             prefix="--seconds-between-progress-updates",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-seconds-between-progress-updates)  Output traversal statistics every time this many seconds elapse  Default value: 10.0. "
             ),
         ),
         ToolInput(
             tag="sequenceDictionary",
             input_type=String(optional=True),
             prefix="--sequence-dictionary",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-sequence-dictionary)  Use the given sequence dictionary as the master/canonical sequence dictionary.  Must be a .dict file.  Default value: null. "
             ),
         ),
         ToolInput(
             tag="sitesOnlyVcfOutput",
             input_type=Boolean(optional=True),
             prefix="--sites-only-vcf-output",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " If true, don't emit genotype fields when writing vcf file output.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="skipMappingQualityTransform",
             input_type=Boolean(optional=True),
             prefix="--skip-mapping-quality-transform",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-skip-mq-transform)  skip the 255 -> 60 MQ read transform  Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="tmpDir",
             input_type=String(optional=True),
             prefix="--tmp-dir",
             separate_value_from_prefix=True,
             default="tmp/",
             doc=InputDocumentation(
                 doc="Temp directory to use. Default value: null."),
         ),
         ToolInput(
             tag="useJdkDeflater",
             input_type=Boolean(optional=True),
             prefix="--use-jdk-deflater",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-jdk-deflater)  Whether to use the JdkDeflater (as opposed to IntelDeflater)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="useJdkInflater",
             input_type=Boolean(optional=True),
             prefix="--use-jdk-inflater",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-jdk-inflater)  Whether to use the JdkInflater (as opposed to IntelInflater)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="verbosity",
             input_type=Boolean(optional=True),
             prefix="--verbosity",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-verbosity)  Control verbosity of logging.  Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} "
             ),
         ),
         ToolInput(
             tag="version",
             input_type=Boolean(optional=True),
             prefix="--version",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "display the version number for this tool Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="disableToolDefaultReadFilters",
             input_type=Boolean(optional=True),
             prefix="--disable-tool-default-read-filters",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-disable-tool-default-read-filters)  Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="maxReadsInMemory",
             input_type=Boolean(optional=True),
             prefix="--max-reads-in-memory",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="Default value: 150000."),
         ),
         ToolInput(
             tag="showhidden",
             input_type=Boolean(optional=True),
             prefix="--showHidden",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-showHidden)  display hidden arguments  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="ambigFilterBases",
             input_type=Int(optional=True),
             prefix="--ambig-filter-bases",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise, overrides threshold fraction.  Default value: null.  Cannot be used in conjuction with argument(s) maxAmbiguousBaseFraction"
             ),
         ),
         ToolInput(
             tag="ambigFilterFrac",
             input_type=Double(optional=True),
             prefix="--ambig-filter-frac",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Threshold fraction of ambiguous bases Default value: 0.05. Cannot be used in conjuction with argument(s) maxAmbiguousBases"
             ),
         ),
         ToolInput(
             tag="maxFragmentLength",
             input_type=Boolean(optional=True),
             prefix="--max-fragment-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="Default value: 1000000."),
         ),
         ToolInput(
             tag="minFragmentLength",
             input_type=Boolean(optional=True),
             prefix="--min-fragment-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="Default value: 0."),
         ),
         ToolInput(
             tag="keepIntervals",
             input_type=String(optional=True),
             prefix="--keep-intervals",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "One or more genomic intervals to keep This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="library",
             input_type=String(optional=True),
             prefix="--library",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-library) Name of the library to keep This argument must be specified at least once. Required."
             ),
         ),
         ToolInput(
             tag="maximumMappingQuality",
             input_type=Int(optional=True),
             prefix="--maximum-mapping-quality",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Maximum mapping quality to keep (inclusive)  Default value: null. "
             ),
         ),
         ToolInput(
             tag="minimumMappingQuality",
             input_type=Int(optional=True),
             prefix="--minimum-mapping-quality",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Minimum mapping quality to keep (inclusive)  Default value: 10. "
             ),
         ),
         ToolInput(
             tag="dontRequireSoftClipsBothEnds",
             input_type=Boolean(optional=True),
             prefix="--dont-require-soft-clips-both-ends",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Allow a read to be filtered out based on having only 1 soft-clipped block. By default, both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped block  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="filterTooShort",
             input_type=Int(optional=True),
             prefix="--filter-too-short",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Minimum number of aligned bases Default value: 30."),
         ),
         ToolInput(
             tag="platformFilterName",
             input_type=Boolean(optional=True),
             prefix="--platform-filter-name",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "This argument must be specified at least once. Required."
             ),
         ),
         ToolInput(
             tag="blackListedLanes",
             input_type=String(optional=True),
             prefix="--black-listed-lanes",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Platform unit (PU) to filter out This argument must be specified at least once. Required."
             ),
         ),
         ToolInput(
             tag="readGroupBlackList",
             input_type=Boolean(optional=True),
             prefix="--read-group-black-list",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="keepReadGroup",
             input_type=String(optional=True),
             prefix="--keep-read-group",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="The name of the read group to keep Required."),
         ),
         ToolInput(
             tag="maxReadLength",
             input_type=Int(optional=True),
             prefix="--max-read-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Keep only reads with length at most equal to the specified value Required."
             ),
         ),
         ToolInput(
             tag="minReadLength",
             input_type=Int(optional=True),
             prefix="--min-read-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Keep only reads with length at least equal to the specified value Default value: 1."
             ),
         ),
         ToolInput(
             tag="readName",
             input_type=String(optional=True),
             prefix="--read-name",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Keep only reads with this read name Required."),
         ),
         ToolInput(
             tag="keepReverseStrandOnly",
             input_type=Boolean(optional=True),
             prefix="--keep-reverse-strand-only",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Keep only reads on the reverse strand  Required. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="sample",
             input_type=String(optional=True),
             prefix="--sample",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-sample) The name of the sample(s) to keep, filtering out all others This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="invertSoftClipRatioFilter",
             input_type=Boolean(optional=True),
             prefix="--invert-soft-clip-ratio-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="softClippedLeadingTrailingRatio",
             input_type=Double(optional=True),
             prefix="--soft-clipped-leading-trailing-ratio",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases in read for read to be filtered.  Default value: null.  Cannot be used in conjuction with argument(s) minimumSoftClippedRatio"
             ),
         ),
         ToolInput(
             tag="softClippedRatioThreshold",
             input_type=Double(optional=True),
             prefix="--soft-clipped-ratio-threshold",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.  Default value: null.  Cannot be used in conjuction with argument(s) minimumLeadingTrailingSoftClippedRatio"
             ),
         ),
     ]
Ejemplo n.º 7
0
 def inputs(self):
     return [
         # ToolInput(
         #     tag="help",
         #     input_type=Boolean(optional=True),
         #     prefix="--help",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(doc="(-h) show this help message and exit"),
         # ),
         ToolInput(
             tag="samples",
             input_type=Csv(),
             prefix="-samples",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-s)  Path to samples (rows) x genes (columns) csv file representing a raw counts matrix. Note: hg19 only supported currently, use other references at own risk."
             ),
         ),
         ToolInput(
             tag="labels",
             input_type=Csv(optional=True),
             prefix="-labels",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-l)  (Optional) Path to samples true labels. CSV with samples (rows) x [sample id, label] (cols). This will enable re-labelling mode. Note: labels must reflect naming conventions used within this tool. View the ALLSorts GitHub Wiki for further details."
             ),
         ),
         ToolInput(
             tag="destination",
             input_type=String(optional=True),
             prefix="-destination",
             default=".",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-d)  Path to where you want the final report to be saved."
             ),
         ),
         # ToolInput(
         #     tag="test",
         #     input_type=Boolean(optional=True),
         #     prefix="-test",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="(-t) Test will run a simple logistic regression."
         #     ),
         # ),
         # ToolInput(
         #     tag="train",
         #     input_type=Boolean(optional=True),
         #     prefix="-train",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="Train a new model. -labels/-l and -samples/-s must be set."
         #     ),
         # ),
         # ToolInput(
         #     tag="model_dir",
         #     input_type=Boolean(optional=True),
         #     prefix="-model_dir",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="Directory for a new model. -train -t flag must be set."
         #     ),
         # ),
         # ToolInput(
         #     tag="njobs",
         #     input_type=Boolean(optional=True),
         #     prefix="-njobs",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="(-j)  (int, default=1) Will set n_jobs for all Sklearn estimators/transformers."
         #     ),
         # ),
         # ToolInput(
         #     tag="cv",
         #     input_type=Boolean(optional=True),
         #     prefix="-cv",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="(int, default=3) If training, how many folds in the cross validation?"
         #     ),
         # ),
         ToolInput(
             tag="verbose",
             input_type=Boolean(optional=True),
             prefix="-verbose",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-v) (flag, default=False) Verbose. Print stage progress."
             ),
         ),
         ToolInput(
             tag="comparison",
             input_type=Boolean(optional=True),
             prefix="-comparison",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Rebuild comparisons for labelled visualisations."),
         ),
         ToolInput(
             tag="force",
             input_type=Boolean(optional=True),
             prefix="-force",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-f) (flag, default=False) Force. Bypass warnings without user confirmation."
             ),
         ),
         ToolInput(
             tag="parents",
             input_type=Boolean(optional=True),
             prefix="-parents",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-p) Include parent meta-subtypes in predictions. Note: This may remove previously unclassified samples."
             ),
         ),
     ]
Ejemplo n.º 8
0
    def constructor(self):

        self.input(
            "normal_inputs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of NORMAL FastqGz pairs. These are aligned separately and merged to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example='["normal_R1.fastq.gz", "normal_R2.fastq.gz"]',
            ),
        )
        self.input(
            "tumor_inputs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of TUMOR FastqGz pairs. These are aligned separately and merged to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example='["tumor_R1.fastq.gz", "tumor_R2.fastq.gz"]',
            ),
        )

        self.input(
            "normal_name",
            String(),
            doc=InputDocumentation(
                "Sample name for the NORMAL sample from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA24385_normal",
            ),
        )
        self.input(
            "tumor_name",
            String(),
            doc=InputDocumentation(
                "Sample name for the TUMOR sample from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA24385_tumor",
            ),
        )

        self.input(
            "cutadapt_adapters",
            File(optional=True),
            doc=InputDocumentation(
                "Specifies a containment list for cutadapt, which contains a list of sequences to determine valid overrepresented sequences from "
                "the FastQC report to trim with Cuatadapt. The file must contain sets of named adapters in the form: "
                "``name[tab]sequence``. Lines prefixed with a hash will be ignored.",
                quality=InputQualityType.static,
                example=
                "https://github.com/csf-ngs/fastqc/blob/master/Contaminants/contaminant_list.txt",
            ),
        )
        self.input(
            "gatk_intervals",
            Array(Bed),
            doc=InputDocumentation(
                "List of intervals over which to split the GATK variant calling",
                quality=InputQualityType.static,
                example="BRCA1.bed",
            ),
        )

        self.input(
            "reference",
            FastaWithDict,
            doc=InputDocumentation(
                """\
The reference genome from which to align the reads. This requires a number indexes (can be generated \
with the 'IndexFasta' pipeline This pipeline has been tested using the HG38 reference set.

This pipeline expects the assembly references to be as they appear in the GCP example:

- (".fai", ".amb", ".ann", ".bwt", ".pac", ".sa", "^.dict").""",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.fasta",
            ),
        )

        self.input(
            "snps_dbsnp",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "(WARNING: The file available from the genomics-public-data resource on Google Cloud Storage is NOT compressed and indexed. This will need to be completed prior to starting the pipeline.\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.gz",
            ),
        )
        self.input(
            "snps_1000gp",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz",
            ),
        )
        self.input(
            "known_indels",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz",
            ),
        )
        self.input(
            "mills_indels",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
            ),
        )

        self.step(
            "tumor",
            self.process_subpipeline(
                reads=self.tumor_inputs,
                sample_name=self.tumor_name,
                reference=self.reference,
                cutadapt_adapters=self.cutadapt_adapters,
            ),
        )
        self.step(
            "normal",
            self.process_subpipeline(
                reads=self.normal_inputs,
                sample_name=self.normal_name,
                reference=self.reference,
                cutadapt_adapters=self.cutadapt_adapters,
            ),
        )

        self.step(
            "vc_gatk",
            GatkSomaticVariantCaller_4_1_3(
                normal_bam=self.normal.out,
                tumor_bam=self.tumor.out,
                normal_name=self.normal_name,
                tumor_name=self.tumor_name,
                intervals=self.gatk_intervals,
                reference=self.reference,
                snps_dbsnp=self.snps_dbsnp,
                snps_1000gp=self.snps_1000gp,
                known_indels=self.known_indels,
                mills_indels=self.mills_indels,
            ),
            scatter="intervals",
        )

        self.step("vc_gatk_merge", Gatk4GatherVcfs_4_1_3(vcfs=self.vc_gatk))
        self.step("sorted", BcfToolsSort_1_9(vcf=self.vc_gatk_merge.out))

        # Outputs

        self.output(
            "normal_bam",
            source=self.normal.out,
            output_folder="bams",
            output_name=self.normal_name,
        )

        self.output(
            "tumor_bam",
            source=self.tumor.out,
            output_folder="bams",
            output_name=self.tumor_name,
        )
        self.output("normal_report",
                    source=self.normal.reports,
                    output_folder="reports")
        self.output("tumor_report",
                    source=self.tumor.reports,
                    output_folder="reports")

        self.output(
            "variants",
            source=self.sorted.out,
            output_folder="variants",
            doc="Merged variants from the GATK caller",
        )
        self.output(
            "variants_split",
            source=self.vc_gatk.out,
            output_folder=["variants", "byInterval"],
            doc="Unmerged variants from the GATK caller (by interval)",
        )
Ejemplo n.º 9
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             tag="reports",
             input_type=Array(Tsv, optional=True),
             prefix="--input",
             separate_value_from_prefix=True,
             prefix_applies_to_all_elements=True,
             doc=InputDocumentation(
                 doc=
                 "(-I) List of scattered BQSR report files This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="outputFilename",
             input_type=Filename(suffix=".recal_data", extension=".tsv"),
             prefix="--output",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-O) File to output the gathered file to Required."),
         ),
         ToolInput(
             tag="arguments_file",
             input_type=File(optional=True),
             prefix="--arguments_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "read one or more arguments files and add them to the command line This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="gatkConfigFile",
             input_type=String(optional=True),
             prefix="--gatk-config-file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "A configuration file to use with the GATK. Default value: null."
             ),
         ),
         ToolInput(
             tag="gcsMaxRetries",
             input_type=Int(optional=True),
             prefix="--gcs-max-retries",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-gcs-retries)  If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection  Default value: 20. "
             ),
         ),
         ToolInput(
             tag="gcsProjectForRequesterPays",
             input_type=String(optional=True),
             prefix="--gcs-project-for-requester-pays",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Project to bill when accessing 'requester pays' buckets. If unset, these buckets cannot be accessed.  Default value: . "
             ),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-h) display the help message Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="quiet",
             input_type=Boolean(optional=True),
             prefix="--QUIET",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to suppress job-summary info on System.err. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="tmpDir",
             input_type=Boolean(optional=True),
             prefix="--tmp-dir",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Temp directory to use. Default value: null."),
         ),
         ToolInput(
             tag="useJdkDeflater",
             input_type=Boolean(optional=True),
             prefix="--use-jdk-deflater",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-jdk-deflater)  Whether to use the JdkDeflater (as opposed to IntelDeflater)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="useJdkInflater",
             input_type=Boolean(optional=True),
             prefix="--use-jdk-inflater",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-jdk-inflater)  Whether to use the JdkInflater (as opposed to IntelInflater)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="verbosity",
             input_type=Boolean(optional=True),
             prefix="--verbosity",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-verbosity)  Control verbosity of logging.  Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} "
             ),
         ),
         ToolInput(
             tag="version",
             input_type=Boolean(optional=True),
             prefix="--version",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "display the version number for this tool Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="showhidden",
             input_type=Boolean(optional=True),
             prefix="--showHidden",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-showHidden)  display hidden arguments  Default value: false. Possible values: {true, false} "
             ),
         ),
     ]
Ejemplo n.º 10
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             tag="bams",
             input_type=Array(Bam, optional=True),
             prefix="--INPUT",
             separate_value_from_prefix=True,
             prefix_applies_to_all_elements=True,
             doc=InputDocumentation(
                 doc=
                 "(-I) Two or more BAM files or text files containing lists of BAM files (one per line). This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="outputFilename",
             input_type=Filename(suffix=".merged", extension=".bam"),
             prefix="--OUTPUT",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-O) The output BAM file to write to. Required."),
         ),
         ToolInput(
             tag="arguments_file",
             input_type=File(optional=True),
             prefix="--arguments_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "read one or more arguments files and add them to the command line This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         # ToolInput(
         #     tag="compression_level",
         #     input_type=Int(optional=True),
         #     prefix="--COMPRESSION_LEVEL",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2."
         #     ),
         # ),
         ToolInput(
             tag="create_index",
             input_type=Boolean(optional=True),
             prefix="--CREATE_INDEX",
             separate_value_from_prefix=True,
             default=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="create_md5_file",
             input_type=Boolean(optional=True),
             prefix="--CREATE_MD5_FILE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="ga4gh_client_secrets",
             input_type=Boolean(optional=True),
             prefix="--GA4GH_CLIENT_SECRETS",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Default value: client_secrets.json."),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-h) display the help message Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="max_records_in_ram",
             input_type=Int(optional=True),
             prefix="--MAX_RECORDS_IN_RAM",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed.  Default value: 500000. "
             ),
         ),
         ToolInput(
             tag="quiet",
             input_type=Boolean(optional=True),
             prefix="--QUIET",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to suppress job-summary info on System.err. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="reference_sequence",
             input_type=File(optional=True),
             prefix="--REFERENCE_SEQUENCE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-R) Reference sequence file. Default value: null."),
         ),
         ToolInput(
             tag="tmp_dir",
             input_type=File(optional=True),
             prefix="--TMP_DIR",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "One or more directories with space available to be used by this program for temporary storage of working files  This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="use_jdk_deflater",
             input_type=Boolean(optional=True),
             prefix="--USE_JDK_DEFLATER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-use_jdk_deflater)  Use the JDK Deflater instead of the Intel Deflater for writing compressed output  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="use_jdk_inflater",
             input_type=Boolean(optional=True),
             prefix="--USE_JDK_INFLATER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-use_jdk_inflater)  Use the JDK Inflater instead of the Intel Inflater for reading compressed input  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="validation_stringency",
             input_type=Boolean(optional=True),
             prefix="--VALIDATION_STRINGENCY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Validation stringency for all SAM files read by this program.  Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.  Default value: STRICT. Possible values: {STRICT, LENIENT, SILENT} "
             ),
         ),
         ToolInput(
             tag="verbosity",
             input_type=Boolean(optional=True),
             prefix="--VERBOSITY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} "
             ),
         ),
         ToolInput(
             tag="version",
             input_type=Boolean(optional=True),
             prefix="--version",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "display the version number for this tool Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="showhidden",
             input_type=Boolean(optional=True),
             prefix="--showHidden",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-showHidden)  display hidden arguments  Default value: false. Possible values: {true, false} "
             ),
         ),
     ]
Ejemplo n.º 11
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             tag="inp",
             input_type=Bam(),
             prefix="--INPUT",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-I) Input file (SAM or BAM) to extract reads from. Required."
             ),
         ),
         ToolInput(
             tag="outputFilename",
             input_type=Filename(
                 prefix=InputSelector("inp", remove_file_extension=True),
                 extension=".bam",
             ),
             prefix="--OUTPUT",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-O) Output file (SAM or BAM) to write extracted reads to. Required."
             ),
         ),
         # since gatk 4.1.4.0 the reference option replace with reference dictionary
         ToolInput(
             tag="sequence_dictionary",
             # tag="reference",
             input_type=File(),
             # prefix="--REFERENCE",
             prefix="--SEQUENCE_DICTIONARY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 "A Sequence Dictionary for the OUTPUT file (can be read from one of the following file types (SAM, BAM, VCF, BCF, Interval List, Fasta, or Dict)"
             ),
             # doc=InputDocumentation(
             #     doc="(-R) Reference sequence to reorder reads to match. A sequence dictionary corresponding to the reference fasta is required.  Create one with CreateSequenceDictionary.  Required. "
             # ),
         ),
         ToolInput(
             tag="allow_contig_length_discordance",
             input_type=Boolean(optional=True),
             prefix="--ALLOW_CONTIG_LENGTH_DISCORDANCE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-U)  If true, then permits mapping from a read contig to a new reference contig with the same name but a different length.  Highly dangerous, only use if you know what you are doing.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="allow_incomplete_dict_concordance",
             input_type=Boolean(optional=True),
             prefix="--ALLOW_INCOMPLETE_DICT_CONCORDANCE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-S)  If true, then allows only a partial overlap of the original contigs with the new reference sequence contigs.  By default, this tool requires a corresponding contig in the new reference for each read contig  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="arguments_file",
             input_type=File(optional=True),
             prefix="--arguments_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="read one or more arguments files and add them to the command line This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         # ToolInput(
         #     tag="compression_level",
         #     input_type=Int(optional=True),
         #     prefix="--COMPRESSION_LEVEL",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2."
         #     ),
         # ),
         ToolInput(
             tag="create_index",
             input_type=Boolean(optional=True),
             default=True,
             prefix="--CREATE_INDEX",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="create_md5_file",
             input_type=Boolean(optional=True),
             prefix="--CREATE_MD5_FILE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="ga4gh_client_secrets",
             input_type=Boolean(optional=True),
             prefix="--GA4GH_CLIENT_SECRETS",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="Default value: client_secrets.json."),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-h) display the help message Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="max_records_in_ram",
             input_type=Int(optional=True),
             prefix="--MAX_RECORDS_IN_RAM",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed.  Default value: 500000. "
             ),
         ),
         ToolInput(
             tag="quiet",
             input_type=Boolean(optional=True),
             prefix="--QUIET",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Whether to suppress job-summary info on System.err. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="tmp_dir",
             input_type=File(optional=True),
             prefix="--TMP_DIR",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="One or more directories with space available to be used by this program for temporary storage of working files  This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="use_jdk_deflater",
             input_type=Boolean(optional=True),
             prefix="--USE_JDK_DEFLATER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-use_jdk_deflater)  Use the JDK Deflater instead of the Intel Deflater for writing compressed output  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="use_jdk_inflater",
             input_type=Boolean(optional=True),
             prefix="--USE_JDK_INFLATER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-use_jdk_inflater)  Use the JDK Inflater instead of the Intel Inflater for reading compressed input  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="validation_stringency",
             input_type=Boolean(optional=True),
             prefix="--VALIDATION_STRINGENCY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=" Validation stringency for all SAM files read by this program.  Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.  Default value: STRICT. Possible values: {STRICT, LENIENT, SILENT} "
             ),
         ),
         ToolInput(
             tag="verbosity",
             input_type=Boolean(optional=True),
             prefix="--VERBOSITY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} "
             ),
         ),
         ToolInput(
             tag="version",
             input_type=Boolean(optional=True),
             prefix="--version",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="display the version number for this tool Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="showhidden",
             input_type=Boolean(optional=True),
             prefix="--showHidden",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-showHidden)  display hidden arguments  Default value: false. Possible values: {true, false} "
             ),
         ),
     ]
Ejemplo n.º 12
0
 def inputs(self):
     return [
         ToolInput(
             tag="aligned_inp",
             input_type=Bam(),
             prefix="-x",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "File in SAM/BAM/CRAM format with main alignments as generated by STAR (Aligned.out.sam). "
                 "Arriba extracts candidate reads from this file. This is sometimes /dev/stdin"
             ),
         ),
         ToolInput(
             tag="inp_chimeric",
             input_type=Bam(optional=True),
             prefix="-c",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "File in SAM/BAM/CRAM format with chimeric alignments as generated by STAR (Chimeric.out.sam). "
                 "This parameter is only required, if STAR was run with the parameter "
                 "'--chimOutType SeparateSAMold'. When STAR was run with the parameter "
                 "'--chimOutType WithinBAM', it suffices to pass the parameter -x to Arriba and -c can be omitted. "
             ),
         ),
         ToolInput(
             tag="gtf_file",
             input_type=File(optional=True),
             prefix="-g",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "GTF file with gene annotation. The file may be gzip-compressed."
             ),
         ),
         ToolInput(
             tag="gtf_features",
             input_type=Csv(optional=True),
             prefix="-G",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Comma-/space-separated list of names of GTF features. "
                 "Default: gene_name=gene_name|gene_id gene_id=gene_id transcript_id=transcript_id feature_exon=exon feature_CDS=CDS "
             ),
         ),
         ToolInput(
             tag="reference",
             input_type=Fasta(optional=True),
             prefix="-a",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "FastA file with genome sequence (assembly). The file may be gzip-compressed. An index with "
                 "the file extension .fai must exist only if CRAM files are processed. "
             ),
         ),
         ToolInput(
             tag="blacklist",
             input_type=File(optional=True),
             prefix="-b",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "File containing blacklisted events (recurrent artifacts and transcripts observed in healthy tissue). "
             ),
         ),
         ToolInput(
             tag="known_fusions",
             input_type=Tsv(optional=True),
             prefix="-k",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "File containing known/recurrent fusions. Some cancer entities are often characterized by "
                 "fusions between the same pair of genes. In order to boost sensitivity, a list of known "
                 "fusions can be supplied using this parameter. The list must contain two columns with the "
                 "names of the fused genes, separated by tabs. "),
         ),
         ToolInput(
             tag="output_filename",
             input_type=Filename(extension=".tsv"),
             prefix="-o",
             default="fusions.tsv",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Output file with fusions that have passed all filters."
             ),
         ),
         ToolInput(
             tag="discarded_output_filename",
             input_type=Filename(suffix=".discarded", extension=".tsv"),
             prefix="-O",
             separate_value_from_prefix=True,
             default="fusions.discarded.tsv",
             doc=InputDocumentation(
                 doc=
                 "Output file with fusions that were discarded due to filtering."
             ),
         ),
         ToolInput(
             tag="structural_variants_coordinates",
             input_type=Tsv(optional=True),
             prefix="-d",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Tab-separated file with coordinates of structural variants found using whole-genome "
                 "sequencing data. These coordinates serve to increase sensitivity towards weakly expressed "
                 "fusions and to eliminate fusions with low evidence. "),
         ),
         ToolInput(
             tag="max_genomic_breakpoint_distance",
             input_type=Int(optional=True),
             prefix="-D",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When a file with genomic breakpoints obtained via whole-genome sequencing is supplied via "
                 "the -d parameter, this parameter determines how far a genomic breakpoint may be away from a "
                 "transcriptomic breakpoint to consider it as a related event. For events inside genes, the "
                 "distance is added to the end of the gene; for intergenic events, the distance threshold is "
                 "applied as is. Default: 100000 "),
         ),
         ToolInput(
             tag="strandedness",
             input_type=String(optional=True),
             prefix="-s",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether a strand-specific protocol was used for library preparation, and if so, the type of "
                 "strandedness (auto/yes/no/reverse). When unstranded data is processed, the strand can "
                 "sometimes be inferred from splice-patterns. But in unclear situations, stranded data helps"
                 " resolve ambiguities. Default: auto "),
         ),
         ToolInput(
             tag="contigs",
             input_type=Array(String(), optional=True),
             prefix="-i",
             doc=InputDocumentation(
                 doc=
                 "Comma-/space-separated list of interesting contigs. Fusions between genes on other contigs "
                 "are ignored. Contigs can be specified with or without the prefix 'chr'. "
                 "Default: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y "
             ),
         ),
         ToolInput(
             tag="filters",
             input_type=Array(String, optional=True),
             prefix="-f",
             separator=" ",
             doc=InputDocumentation(
                 doc=
                 "Comma-/space-separated list of filters to disable. By default all filters are enabled. "
                 "Valid values: homopolymer, same_gene, inconsistently_clipped, duplicates, low_entropy, "
                 "no_genomic_support, short_anchor, homologs, blacklist, pcr_fusions, isoforms, intronic, "
                 "uninteresting_contigs, read_through, genomic_support, mismatches, no_coverage, spliced, "
                 "mismappers, merge_adjacent, select_best, many_spliced, long_gap, min_support, "
                 "relative_support, end_to_end, known_fusions, non_coding_neighbors, intragenic_exonic, "
                 "hairpin, small_insert_size "),
         ),
         ToolInput(
             tag="max_e_value",
             input_type=Float(optional=True),
             prefix="-E",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Arriba estimates the number of fusions with a given number of supporting reads which one "
                 "would expect to see by random chance. If the expected number of fusions (e-value) is higher "
                 "than this threshold, the fusion is discarded by the 'relative_support' filter. Note: "
                 "Increasing this threshold can dramatically increase the number of false positives and may "
                 "increase the runtime of resource-intensive steps. Fractional values are possible. "
                 "Default: 0.300000 "),
         ),
         ToolInput(
             tag="min_supporting_reads",
             input_type=Int(optional=True),
             prefix="-S",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The 'min_support' filter discards all fusions with fewer than this many supporting reads "
                 "(split reads and discordant mates combined). Default: 2 "
             ),
         ),
         ToolInput(
             tag="max_mismappers",
             input_type=Float(optional=True),
             prefix="-m",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When more than this fraction of supporting reads turns out to be mismappers, the "
                 "'mismappers' filter discards the fusion. Default: 0.800000 "
             ),
         ),
         ToolInput(
             tag="max_homolog_identity",
             input_type=Float(optional=True),
             prefix="-L",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Genes with more than the given fraction of sequence identity are considered homologs and "
                 "removed by the 'homologs' filter. Default: 0.300000 "),
         ),
         ToolInput(
             tag="homopolymer_length",
             input_type=Int(optional=True),
             prefix="-H",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The 'homopolymer' filter removes breakpoints adjacent to homopolymers of the given length "
                 "or more. Default: 6 "),
         ),
         ToolInput(
             tag="read_through_distance",
             input_type=Int(optional=True),
             prefix="-R",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The 'read_through' filter removes read-through fusions where the breakpoints are "
                 "less than the given distance away from each other. Default: 10000 "
             ),
         ),
         ToolInput(
             tag="min_anchor_length",
             input_type=Int(optional=True),
             prefix="-A",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Alignment artifacts are often characterized by split reads coming from only one gene "
                 "and no discordant mates. Moreover, the split reads only align to a short stretch in one "
                 "of the genes. The 'short_anchor' filter removes these fusions. This parameter sets the "
                 "threshold in bp for what the filter considers short. Default: 23 "
             ),
         ),
         ToolInput(
             tag="many_spliced_events",
             input_type=Int(optional=True),
             prefix="-M",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The 'many_spliced' filter recovers fusions between genes that have at least this "
                 "many spliced breakpoints. Default: 4 "),
         ),
         ToolInput(
             tag="max_kmer_content",
             input_type=Float(optional=True),
             prefix="-K",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The 'low_entropy' filter removes reads with repetitive 3-mers. If the 3-mers make up more "
                 "than the given fraction of the sequence, then the read is discarded. Default: 0.600000 "
             ),
         ),
         ToolInput(
             tag="max_mismatch_pvalue",
             input_type=Float(optional=True),
             prefix="-V",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The 'mismatches' filter uses a binomial model to calculate a p-value for observing a given "
                 "number of mismatches in a read. If the number of mismatches is too high, the read is "
                 "discarded. Default: 0.010000 "),
         ),
         ToolInput(
             tag="fragment_length",
             input_type=Int(optional=True),
             prefix="-F",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When paired-end data is given, the fragment length is estimated automatically and this "
                 "parameter has no effect. But when single-end data is given, the mean fragment length "
                 "should be specified to effectively filter fusions that arise from hairpin structures. "
                 "Default: 200 "),
         ),
         ToolInput(
             tag="max_reads",
             input_type=Int(optional=True),
             prefix="-U",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Subsample fusions with more than the given number of supporting reads. This improves "
                 "performance without compromising sensitivity, as long as the threshold is high. Counting "
                 "of supporting reads beyond the threshold is inaccurate, obviously. Default: 300 "
             ),
         ),
         ToolInput(
             tag="quantile",
             input_type=Float(optional=True),
             prefix="-Q",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Highly expressed genes are prone to produce artifacts during library preparation. Genes "
                 "with an expression above the given quantile are eligible for filtering by the 'pcr_fusions' "
                 "filter. Default: 0.998000 "),
         ),
         ToolInput(
             tag="exonic_fraction",
             input_type=Float(optional=True),
             prefix="-e",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "The breakpoints of false-positive predictions of intragenic events are often both in exons. "
                 "True predictions are more likely to have at least one breakpoint in an intron, because "
                 "introns are larger. If the fraction of exonic sequence between two breakpoints is smaller "
                 "than the given fraction, the 'intragenic_exonic' filter discards the event. Default: 0.200000"
             ),
         ),
         ToolInput(
             tag="fusion_transcript",
             input_type=Boolean(optional=True),
             prefix="-T",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When set, the column 'fusion_transcript' is populated with the sequence of the fused genes "
                 "as assembled from the supporting reads. Specify the flag twice to also print the fusion "
                 "transcripts to the file containing discarded fusions (-O). Default: off "
             ),
         ),
         ToolInput(
             tag="peptide_sequence",
             input_type=Boolean(optional=True),
             prefix="-P",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When set, the column 'peptide_sequence' is populated with the sequence of the fused proteins "
                 "as assembled from the supporting reads. Specify the flag twice to also print the peptide "
                 "sequence to the file containing discarded fusions (-O). Default: off "
             ),
         ),
         ToolInput(
             tag="read_identifiers",
             input_type=Boolean(optional=True),
             prefix="-I",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When set, the column 'read_identifiers' is populated with identifiers of the reads which "
                 "support the fusion. The identifiers are separated by commas. Specify the flag twice to "
                 "also print the read identifiers to the file containing discarded fusions (-O). Default: off "
             ),
         ),
         # ToolInput(
         #   tag="help",
         #   input_type=Boolean(optional=True),
         #   prefix="-h",
         #   separate_value_from_prefix=True,
         #   doc=InputDocumentation(doc="Print help and exit."),
         # ),
     ]
Ejemplo n.º 13
0
 def inputs(self):
     return [
         ToolInput(
             tag="input_file",
             input_type=File(optional=True),
             prefix="--input_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-i) Specify the input file (i.e. the VEP results file). If no input file is specified, the "
                 "script will attempt to read from STDIN. Input may be gzipped - to force the script to read "
                 "a file as gzipped, use --gz"),
         ),
         ToolInput(
             tag="format",
             input_type=String(optional=True),
             prefix="--format",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "[vcf|tab] Specify input file format (tab for any tab-delimited format,"
                 " including default VEP output format)"),
         ),
         ToolInput(
             tag="outputFilename",
             input_type=Filename(extension=".txt"),
             prefix="--output_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-o) Specify the output file to write to. If no output file is specified, "
                 "the script will write to STDOUT"),
         ),
         ToolInput(
             tag="force_overwrite",
             input_type=Boolean(optional=True),
             prefix="--force_overwrite",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Force the script to overwrite the output file if it already exists"
             ),
         ),
         ToolInput(
             tag="filter",
             input_type=Array(String, optional=True),
             prefix="--filter",
             separate_value_from_prefix=True,
             prefix_applies_to_all_elements=True,
             doc=InputDocumentation(
                 doc=
                 "(-f) Add filter. Multiple --filter flags may be used, and are "
                 "treated as logical ANDs, i.e. all filters must pass for a line to be printed"
             ),
         ),
         ToolInput(
             tag="list",
             input_type=Array(String, optional=True),
             prefix="--list",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-l) List allowed fields from the input file"),
         ),
         ToolInput(
             tag="count",
             input_type=Boolean(optional=True),
             prefix="--count",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-c) Print only a count of matched lines"),
         ),
         ToolInput(
             tag="only_matched",
             input_type=Boolean(optional=True),
             prefix="--only_matched",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "In VCF files, the CSQ field that contains the consequence data will often "
                 "contain more than  one 'block' of consequence data, where each block corresponds "
                 "to a variant/feature overlap. Using  filters. By default, the script prints out the "
                 "entire VCF line if any of the blocks pass the filters."),
         ),
         ToolInput(
             tag="vcf_info_field",
             input_type=String(optional=True),
             prefix="--vcf_info_field",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "With VCF input files, by default filter_vep expects to find VEP annotations encoded in the"
                 " CSQ INFO key; VEP itself can be configured to write to a different key (with the equivalent "
                 "--vcf_info_field flag). Use this flag to change the INFO key VEP expects to decode."
             ),
         ),
         ToolInput(
             tag="ontology",
             input_type=Boolean(optional=True),
             prefix="--ontology",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-y) Use Sequence Ontology to match consequence terms. Use with operator 'is' to match "
                 "against all child terms of your value. e.g. 'Consequence is coding_sequence_variant' "
                 "will match missense_variant, synonymous_variant etc. Requires database connection; "
                 "defaults to connecting to ensembldb.ensembl.org. Use --host, --port, --user, --version) "
                 "connection parameters."),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="-h Print usage message and exit"),
         ),
     ]
Ejemplo n.º 14
0
 def inputs(self):
     return [
         ToolInput(
             tag="outputFilename",
             input_type=Filename(
                 prefix=InputSelector("variant",
                                      remove_file_extension=True),
                 suffix=".scored",
                 extension=".vcf.gz",
             ),
             prefix="--output",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="(-O) Output file Required."),
         ),
         ToolInput(
             tag="reference",
             input_type=FastaWithDict(optional=True),
             prefix="--reference",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-R) Reference sequence file Required."),
         ),
         ToolInput(
             tag="variant",
             input_type=Vcf(optional=True),
             prefix="--variant",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-V) A VCF file containing variants Required."),
         ),
         ToolInput(
             tag="addOutputSamProgramRecord",
             input_type=Boolean(optional=True),
             prefix="--add-output-sam-program-record",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-add-output-sam-program-record)  If true, adds a PG tag to created SAM/BAM/CRAM files.  Default value: true. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="addOutputVcfCommandLine",
             input_type=Boolean(optional=True),
             prefix="--add-output-vcf-command-line",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-add-output-vcf-command-line)  If true, adds a command line header line to created VCF files.  Default value: true. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="architecture",
             input_type=JsonFile(optional=True),
             prefix="--architecture",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-architecture)  Neural Net architecture configuration json file  Default value: null. "
             ),
         ),
         ToolInput(
             tag="arguments_file",
             input_type=Array(File, optional=True),
             prefix="--arguments_file",
             separate_value_from_prefix=True,
             prefix_applies_to_all_elements=True,
             doc=InputDocumentation(
                 doc=
                 "read one or more arguments files and add them to the command line This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="cloudIndexPrefetchBuffer",
             input_type=Int(optional=True),
             prefix="--cloud-index-prefetch-buffer",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-CIPB)  Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset.  Default value: -1. "
             ),
         ),
         ToolInput(
             tag="cloudPrefetchBuffer",
             input_type=Int(optional=True),
             prefix="--cloud-prefetch-buffer",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-CPB)  Size of the cloud-only prefetch buffer (in MB; 0 to disable).  Default value: 40. "
             ),
         ),
         ToolInput(
             tag="createOutputBamIndex",
             input_type=Boolean(optional=True),
             prefix="--create-output-bam-index",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OBI)  If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file.  Default value: true. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="createOutputBamMd5",
             input_type=Boolean(optional=True),
             prefix="--create-output-bam-md5",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OBM)  If true, create a MD5 digest for any BAM/SAM/CRAM file created  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="createOutputVariantIndex",
             input_type=Boolean(optional=True),
             prefix="--create-output-variant-index",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OVI)  If true, create a VCF index when writing a coordinate-sorted VCF file.  Default value: true. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="createOutputVariantMd5",
             input_type=Boolean(optional=True),
             prefix="--create-output-variant-md5",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-OVM)  If true, create a a MD5 digest any VCF file created.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="disableBamIndexCaching",
             input_type=Boolean(optional=True),
             prefix="--disable-bam-index-caching",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-DBIC)  If true, don't cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified.  Caching is automatically disabled if there are no intervals specified.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="disableReadFilter",
             input_type=String(optional=True),
             prefix="--disable-read-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-DF)  Read filters to be disabled before analysis  This argument may be specified 0 or more times. Default value: null. Possible Values: {ReadGroupBlackListReadFilter, WellformedReadFilter}"
             ),
         ),
         ToolInput(
             tag="disableSequenceDictionaryValidation",
             input_type=Boolean(optional=True),
             prefix="--disable-sequence-dictionary-validation",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-disable-sequence-dictionary-validation)  If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="excludeIntervals",
             input_type=Boolean(optional=True),
             prefix="--exclude-intervals",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-XL) This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="filterSymbolicAndSv",
             input_type=Boolean(optional=True),
             prefix="--filter-symbolic-and-sv",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-filter-symbolic-and-sv)  If set will filter symbolic and and structural variants from the input VCF  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="gatkConfigFile",
             input_type=String(optional=True),
             prefix="--gatk-config-file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "A configuration file to use with the GATK. Default value: null."
             ),
         ),
         ToolInput(
             tag="gcsMaxRetries",
             input_type=Int(optional=True),
             prefix="--gcs-max-retries",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-gcs-retries)  If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection  Default value: 20. "
             ),
         ),
         ToolInput(
             tag="gcsProjectForRequesterPays",
             input_type=String(optional=True),
             prefix="--gcs-project-for-requester-pays",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Project to bill when accessing 'requester pays' buckets. If unset, these buckets cannot be accessed.  Default value: . "
             ),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-h) display the help message Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="inp",
             input_type=String(optional=True),
             prefix="--input",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-I) BAM/SAM/CRAM file containing reads This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="intervalExclusionPadding",
             input_type=Int(optional=True),
             prefix="--interval-exclusion-padding",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-ixp)  Amount of padding (in bp) to add to each interval you are excluding.  Default value: 0. "
             ),
         ),
         ToolInput(
             tag="intervalMergingRule",
             input_type=Boolean(optional=True),
             prefix="--interval-merging-rule",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-imr)  Interval merging rule for abutting intervals  Default value: ALL. Possible values: {ALL, OVERLAPPING_ONLY} "
             ),
         ),
         ToolInput(
             tag="intervalPadding",
             input_type=Boolean(optional=True),
             prefix="--interval-padding",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="(-ip) Default value: 0."),
         ),
         ToolInput(
             tag="intervalSetRule",
             input_type=Boolean(optional=True),
             prefix="--interval-set-rule",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-isr)  Set merging approach to use for combining interval inputs  Default value: UNION. Possible values: {UNION, INTERSECTION} "
             ),
         ),
         ToolInput(
             tag="intervals",
             input_type=String(optional=True),
             prefix="--intervals",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-L) One or more genomic intervals over which to operate This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="lenient",
             input_type=Boolean(optional=True),
             prefix="--lenient",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-LE) Lenient processing of VCF files Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="quiet",
             input_type=Boolean(optional=True),
             prefix="--QUIET",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to suppress job-summary info on System.err. Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="readFilter",
             input_type=String(optional=True),
             prefix="--read-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-RF) Read filters to be applied before analysis This argument may be specified 0 or more times. Default value: null. Possible Values: {AlignmentAgreesWithHeaderReadFilter, AllowAllReadsReadFilter, AmbiguousBaseReadFilter, CigarContainsNoNOperator, FirstOfPairReadFilter, FragmentLengthReadFilter, GoodCigarReadFilter, HasReadGroupReadFilter, IntervalOverlapReadFilter, LibraryReadFilter, MappedReadFilter, MappingQualityAvailableReadFilter, MappingQualityNotZeroReadFilter, MappingQualityReadFilter, MatchingBasesAndQualsReadFilter, MateDifferentStrandReadFilter, MateOnSameContigOrNoMappedMateReadFilter, MateUnmappedAndUnmappedReadFilter, MetricsReadFilter, NonChimericOriginalAlignmentReadFilter, NonZeroFragmentLengthReadFilter, NonZeroReferenceLengthAlignmentReadFilter, NotDuplicateReadFilter, NotOpticalDuplicateReadFilter, NotSecondaryAlignmentReadFilter, NotSupplementaryAlignmentReadFilter, OverclippedReadFilter, PairedReadFilter, PassesVendorQualityCheckReadFilter, PlatformReadFilter, PlatformUnitReadFilter, PrimaryLineReadFilter, ProperlyPairedReadFilter, ReadGroupBlackListReadFilter, ReadGroupReadFilter, ReadLengthEqualsCigarLengthReadFilter, ReadLengthReadFilter, ReadNameReadFilter, ReadStrandFilter, SampleReadFilter, SecondOfPairReadFilter, SeqIsStoredReadFilter, SoftClippedReadFilter, ValidAlignmentEndReadFilter, ValidAlignmentStartReadFilter, WellformedReadFilter}"
             ),
         ),
         ToolInput(
             tag="readIndex",
             input_type=String(optional=True),
             prefix="--read-index",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-read-index)  Indices to use for the read inputs. If specified, an index must be provided for every read input and in the same order as the read inputs. If this argument is not specified, the path to the index for each input will be inferred automatically.  This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="readLimit",
             input_type=Int(optional=True),
             prefix="--read-limit",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-read-limit)  Maximum number of reads to encode in a tensor, for 2D models only.  Default value: 128. "
             ),
         ),
         ToolInput(
             tag="readValidationStringency",
             input_type=Boolean(optional=True),
             prefix="--read-validation-stringency",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-VS)  Validation stringency for all SAM/BAM/CRAM/SRA files read by this program.  The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.  Default value: SILENT. Possible values: {STRICT, LENIENT, SILENT} "
             ),
         ),
         ToolInput(
             tag="secondsBetweenProgressUpdates",
             input_type=Double(optional=True),
             prefix="--seconds-between-progress-updates",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-seconds-between-progress-updates)  Output traversal statistics every time this many seconds elapse  Default value: 10.0. "
             ),
         ),
         ToolInput(
             tag="sequenceDictionary",
             input_type=String(optional=True),
             prefix="--sequence-dictionary",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-sequence-dictionary)  Use the given sequence dictionary as the master/canonical sequence dictionary.  Must be a .dict file.  Default value: null. "
             ),
         ),
         ToolInput(
             tag="sitesOnlyVcfOutput",
             input_type=Boolean(optional=True),
             prefix="--sites-only-vcf-output",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " If true, don't emit genotype fields when writing vcf file output.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="tensorType",
             input_type=Boolean(optional=True),
             prefix="--tensor-type",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-tensor-type)  Name of the tensors to generate, reference for 1D reference tensors and read_tensor for 2D tensors.  Default value: reference. Possible values: { reference ( 1 Hot encoding of a reference sequence. ) read_tensor (Read tensor are 3D tensors spanning aligned reads, sites and channels. The maximum number of reads is a hyper-parameter typically set to 128. There are 15 channels in the read tensor. They correspond to the reference sequence data (4), read sequence data (4), insertions and deletions (2) read flags (4) and mapping quality (1).) } "
             ),
         ),
         ToolInput(
             tag="tmpDir",
             input_type=Boolean(optional=True),
             prefix="--tmp-dir",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Temp directory to use. Default value: null."),
         ),
         ToolInput(
             tag="useJdkDeflater",
             input_type=Boolean(optional=True),
             prefix="--use-jdk-deflater",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-jdk-deflater)  Whether to use the JdkDeflater (as opposed to IntelDeflater)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="useJdkInflater",
             input_type=Boolean(optional=True),
             prefix="--use-jdk-inflater",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-jdk-inflater)  Whether to use the JdkInflater (as opposed to IntelInflater)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="verbosity",
             input_type=Boolean(optional=True),
             prefix="--verbosity",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-verbosity)  Control verbosity of logging.  Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} "
             ),
         ),
         ToolInput(
             tag="version",
             input_type=Boolean(optional=True),
             prefix="--version",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "display the version number for this tool Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="weights",
             input_type=String(optional=True),
             prefix="--weights",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-weights) Keras model HD5 file with neural net weights. Default value: null."
             ),
         ),
         ToolInput(
             tag="windowSize",
             input_type=Int(optional=True),
             prefix="--window-size",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-window-size)  Neural Net input window size  Default value: 128. "
             ),
         ),
         ToolInput(
             tag="disableAvxCheck",
             input_type=Boolean(optional=True),
             prefix="--disable-avx-check",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-disable-avx-check)  If set, no check will be made for AVX support.  Use only if you have installed a pre-1.6 TensorFlow build.   Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="disableToolDefaultReadFilters",
             input_type=Boolean(optional=True),
             prefix="--disable-tool-default-read-filters",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-disable-tool-default-read-filters)  Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="inferenceBatchSize",
             input_type=Int(optional=True),
             prefix="--inference-batch-size",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-inference-batch-size)  Size of batches for python to do inference on.  Default value: 256. "
             ),
         ),
         ToolInput(
             tag="infoAnnotationKeys",
             input_type=String(optional=True),
             prefix="--info-annotation-keys",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-info-annotation-keys)  The VCF info fields to send to python.  This should only be changed if a new model has been trained which expects the annotations provided here.  This argument may be specified 0 or more times. Default value: [MQ, DP, SOR, FS, QD, MQRankSum, ReadPosRankSum]. "
             ),
         ),
         ToolInput(
             tag="interOpThreads",
             input_type=Int(optional=True),
             prefix="--inter-op-threads",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-inter-op-threads)  Number of inter-op parallelism threads to use for Tensorflow  Default value: 0. "
             ),
         ),
         ToolInput(
             tag="intraOpThreads",
             input_type=Int(optional=True),
             prefix="--intra-op-threads",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-intra-op-threads)  Number of intra-op parallelism threads to use for Tensorflow  Default value: 0. "
             ),
         ),
         ToolInput(
             tag="outputTensorDir",
             input_type=String(optional=True),
             prefix="--output-tensor-dir",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-output-tensor-dir)  Optional directory where tensors can be saved for debugging or visualization.  Default value: . "
             ),
         ),
         ToolInput(
             tag="showhidden",
             input_type=Boolean(optional=True),
             prefix="--showHidden",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-showHidden)  display hidden arguments  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="transferBatchSize",
             input_type=Int(optional=True),
             prefix="--transfer-batch-size",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-transfer-batch-size)  Size of data to queue for python streaming.  Default value: 512. "
             ),
         ),
         ToolInput(
             tag="ambigFilterBases",
             input_type=Int(optional=True),
             prefix="--ambig-filter-bases",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise, overrides threshold fraction.  Default value: null.  Cannot be used in conjuction with argument(s) maxAmbiguousBaseFraction"
             ),
         ),
         ToolInput(
             tag="ambigFilterFrac",
             input_type=Double(optional=True),
             prefix="--ambig-filter-frac",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Threshold fraction of ambiguous bases Default value: 0.05. Cannot be used in conjuction with argument(s) maxAmbiguousBases"
             ),
         ),
         ToolInput(
             tag="maxFragmentLength",
             input_type=Boolean(optional=True),
             prefix="--max-fragment-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="Default value: 1000000."),
         ),
         ToolInput(
             tag="minFragmentLength",
             input_type=Boolean(optional=True),
             prefix="--min-fragment-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(doc="Default value: 0."),
         ),
         ToolInput(
             tag="keepIntervals",
             input_type=String(optional=True),
             prefix="--keep-intervals",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "One or more genomic intervals to keep This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="library",
             input_type=String(optional=True),
             prefix="--library",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-library) Name of the library to keep This argument must be specified at least once. Required."
             ),
         ),
         ToolInput(
             tag="maximumMappingQuality",
             input_type=Int(optional=True),
             prefix="--maximum-mapping-quality",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Maximum mapping quality to keep (inclusive)  Default value: null. "
             ),
         ),
         ToolInput(
             tag="minimumMappingQuality",
             input_type=Int(optional=True),
             prefix="--minimum-mapping-quality",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Minimum mapping quality to keep (inclusive)  Default value: 10. "
             ),
         ),
         ToolInput(
             tag="dontRequireSoftClipsBothEnds",
             input_type=Boolean(optional=True),
             prefix="--dont-require-soft-clips-both-ends",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Allow a read to be filtered out based on having only 1 soft-clipped block. By default, both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped block  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="filterTooShort",
             input_type=Int(optional=True),
             prefix="--filter-too-short",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Minimum number of aligned bases Default value: 30."),
         ),
         ToolInput(
             tag="platformFilterName",
             input_type=Boolean(optional=True),
             prefix="--platform-filter-name",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "This argument must be specified at least once. Required."
             ),
         ),
         ToolInput(
             tag="blackListedLanes",
             input_type=String(optional=True),
             prefix="--black-listed-lanes",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Platform unit (PU) to filter out This argument must be specified at least once. Required."
             ),
         ),
         ToolInput(
             tag="readGroupBlackList",
             input_type=Boolean(optional=True),
             prefix="--read-group-black-list",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "This argument may be specified 0 or more times. Default value: [ID:ArtificialHaplotypeRG, ID:ArtificialHaplotype]. "
             ),
         ),
         ToolInput(
             tag="keepReadGroup",
             input_type=String(optional=True),
             prefix="--keep-read-group",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="The name of the read group to keep Required."),
         ),
         ToolInput(
             tag="maxReadLength",
             input_type=Int(optional=True),
             prefix="--max-read-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Keep only reads with length at most equal to the specified value Required."
             ),
         ),
         ToolInput(
             tag="minReadLength",
             input_type=Int(optional=True),
             prefix="--min-read-length",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Keep only reads with length at least equal to the specified value Default value: 1."
             ),
         ),
         ToolInput(
             tag="readName",
             input_type=String(optional=True),
             prefix="--read-name",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Keep only reads with this read name Required."),
         ),
         ToolInput(
             tag="keepReverseStrandOnly",
             input_type=Boolean(optional=True),
             prefix="--keep-reverse-strand-only",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Keep only reads on the reverse strand  Required. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="sample",
             input_type=String(optional=True),
             prefix="--sample",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-sample) The name of the sample(s) to keep, filtering out all others This argument must be specified at least once. Required. "
             ),
         ),
         ToolInput(
             tag="invertSoftClipRatioFilter",
             input_type=Boolean(optional=True),
             prefix="--invert-soft-clip-ratio-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="softClippedLeadingTrailingRatio",
             input_type=Double(optional=True),
             prefix="--soft-clipped-leading-trailing-ratio",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases in read for read to be filtered.  Default value: null.  Cannot be used in conjuction with argument(s) minimumSoftClippedRatio"
             ),
         ),
         ToolInput(
             tag="softClippedRatioThreshold",
             input_type=Double(optional=True),
             prefix="--soft-clipped-ratio-threshold",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.  Default value: null.  Cannot be used in conjuction with argument(s) minimumLeadingTrailingSoftClippedRatio"
             ),
         ),
     ]
Ejemplo n.º 15
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             tag="inp",
             input_type=Bam(),
             prefix="--INPUT",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-I) Input file (BAM or SAM or a GA4GH url). Required."
             ),
         ),
         ToolInput(
             tag="outputFilename",
             input_type=Filename(
                 prefix=InputSelector("inp", remove_file_extension=True),
                 extension=".bam",
             ),
             prefix="--OUTPUT",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-O) Output file (BAM or SAM). Required."),
         ),
         ToolInput(
             tag="rglb",
             input_type=String(),
             prefix="--RGLB",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-LB) Read-Group library Required."),
         ),
         ToolInput(
             tag="rgpl",
             input_type=String(),
             prefix="--RGPL",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-PL) Read-Group platform (e.g. ILLUMINA, SOLID) Required."
             ),
         ),
         ToolInput(
             tag="rgpu",
             input_type=String(),
             prefix="--RGPU",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-PU) Read-Group platform unit (eg. run barcode) Required."
             ),
         ),
         ToolInput(
             tag="rgsm",
             input_type=String(),
             prefix="--RGSM",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-SM) Read-Group sample name Required."),
         ),
         ToolInput(
             tag="arguments_file",
             input_type=File(optional=True),
             prefix="--arguments_file",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "read one or more arguments files and add them to the command line This argument may be "
                 "specified 0 or more times. Default value: null. "),
         ),
         # ToolInput(
         #     tag="compression_level",
         #     input_type=Int(optional=True),
         #     prefix="--COMPRESSION_LEVEL",
         #     separate_value_from_prefix=True,
         #     doc=InputDocumentation(
         #         doc="Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2."
         #     ),
         # ),
         ToolInput(
             tag="create_index",
             input_type=Boolean(optional=True),
             prefix="--CREATE_INDEX",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to create a BAM index when writing a coordinate-sorted BAM file. "
                 "Default value: false. Possible values: {true, false} "),
         ),
         ToolInput(
             tag="create_md5_file",
             input_type=Boolean(optional=True),
             prefix="--CREATE_MD5_FILE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "Whether to create an MD5 digest for any BAM or FASTQ files created. "
                 "Default value: false. Possible values: {true, false} "),
         ),
         ToolInput(
             tag="ga4gh_client_secrets",
             input_type=Boolean(optional=True),
             prefix="--GA4GH_CLIENT_SECRETS",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Default value: client_secrets.json."),
         ),
         ToolInput(
             tag="help",
             input_type=Boolean(optional=True),
             prefix="--help",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-h) display the help message Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="max_records_in_ram",
             input_type=Int(optional=True),
             prefix="--MAX_RECORDS_IN_RAM",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "When writing files that need to be sorted, this will specify the number of records "
                 "stored in RAM before spilling to disk. Increasing this number reduces the number of file "
                 "handles needed to sort the file, and increases the amount of RAM needed.  "
                 "Default value: 500000. "),
         ),
         ToolInput(
             tag="quiet",
             input_type=Boolean(optional=True),
             prefix="--QUIET",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Whether to suppress job-summary info on System.err. "
                 "Default value: false. Possible values: {true, false} "),
         ),
         ToolInput(
             tag="reference_sequence",
             input_type=File(optional=True),
             prefix="--REFERENCE_SEQUENCE",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-R) Reference sequence file. Default value: null."),
         ),
         ToolInput(
             tag="rgcn",
             input_type=String(optional=True),
             prefix="--RGCN",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-CN) Read-Group sequencing center name Default value: null."
             ),
         ),
         ToolInput(
             tag="rgds",
             input_type=String(optional=True),
             prefix="--RGDS",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-DS) Read-Group description Default value: null."),
         ),
         ToolInput(
             tag="rgdt",
             input_type=Boolean(optional=True),
             prefix="--RGDT",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-DT) Read-Group run date Default value: null."),
         ),
         ToolInput(
             tag="rgfo",
             input_type=String(optional=True),
             prefix="--RGFO",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-FO) Read-Group flow order Default value: null."),
         ),
         ToolInput(
             tag="rgid",
             input_type=String(optional=True),
             prefix="--RGID",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-ID) Read-Group ID Default value: 1."),
         ),
         ToolInput(
             tag="rgks",
             input_type=String(optional=True),
             prefix="--RGKS",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-KS) Read-Group key sequence Default value: null."),
         ),
         ToolInput(
             tag="rgpg",
             input_type=String(optional=True),
             prefix="--RGPG",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-PG) Read-Group program group Default value: null."),
         ),
         ToolInput(
             tag="rgpi",
             input_type=Int(optional=True),
             prefix="--RGPI",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-PI) Read-Group predicted insert size Default value: null."
             ),
         ),
         ToolInput(
             tag="rgpm",
             input_type=String(optional=True),
             prefix="--RGPM",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-PM) Read-Group platform model Default value: null."
             ),
         ),
         ToolInput(
             tag="sort_order",
             input_type=String(optional=True),
             prefix="--SORT_ORDER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-SO) Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT. "
                 "Default value: null. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} "
             ),
         ),
         ToolInput(
             tag="tmp_dir",
             input_type=File(optional=True),
             prefix="--TMP_DIR",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "One or more directories with space available to be used by this program for temporary storage "
                 "of working files  This argument may be specified 0 or more times. Default value: null. "
             ),
         ),
         ToolInput(
             tag="use_jdk_deflater",
             input_type=Boolean(optional=True),
             prefix="--USE_JDK_DEFLATER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-use_jdk_deflater)  Use the JDK Deflater instead of the Intel Deflater for writing "
                 "compressed output  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="use_jdk_inflater",
             input_type=Boolean(optional=True),
             prefix="--USE_JDK_INFLATER",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-use_jdk_inflater)  Use the JDK Inflater instead of the Intel Inflater for reading "
                 "compressed input  Default value: false. Possible values: {true, false} "
             ),
         ),
         ToolInput(
             tag="validation_stringency",
             input_type=String(optional=True),
             prefix="--VALIDATION_STRINGENCY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 " Validation stringency for all SAM files read by this program.  Setting stringency to "
                 "SILENT can improve performance when processing a BAM file in which variable-length data "
                 "(read, qualities, tags) do not otherwise need to be decoded.  Default value: STRICT. "
                 "Possible values: {STRICT, LENIENT, SILENT} "),
         ),
         ToolInput(
             tag="verbosity",
             input_type=Boolean(optional=True),
             prefix="--VERBOSITY",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="Control verbosity of logging. Default value: INFO. "
                 "Possible values: {ERROR, WARNING, INFO, DEBUG} "),
         ),
         ToolInput(
             tag="version",
             input_type=Boolean(optional=True),
             prefix="--version",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "display the version number for this tool Default value: false. Possible values: {true, false}"
             ),
         ),
         ToolInput(
             tag="showhidden",
             input_type=Boolean(optional=True),
             prefix="--showHidden",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-showHidden)  display hidden arguments  Default value: false. Possible values: {true, false}"
             ),
         ),
     ]
Ejemplo n.º 16
0
 def inputs(self):
     return [
         ToolInput(
             "vcf",
             Vcf,
             position=1,
             doc="VCF to filter",
         ),
         ToolInput(
             tag="info_filter",
             input_type=String(optional=True),
             prefix="--info-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-f) specifies a filter to apply to the info fields of records, "
                 "removes alleles which do not pass the filter"),
         ),
         ToolInput(
             tag="genotype_filter",
             input_type=String(optional=True),
             prefix="--genotype-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-g) specifies a filter to apply to the genotype fields of records"
             ),
         ),
         ToolInput(
             tag="keep_info",
             input_type=Boolean(optional=True),
             prefix="--keep-info",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-k) used in conjunction with '-g', keeps variant info, but removes genotype"
             ),
         ),
         ToolInput(
             tag="filter_sites",
             input_type=Boolean(optional=True),
             prefix="--filter-sites",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-s) filter entire records, not just alleles"),
         ),
         ToolInput(
             tag="tag_pass",
             input_type=String(optional=True),
             prefix="--tag-pass",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-t) tag vcf records as positively filtered with this tag, print all records"
             ),
         ),
         ToolInput(
             tag="tag_fail",
             input_type=String(optional=True),
             prefix="--tag-fail",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-F) tag vcf records as negatively filtered with this tag, print all records"
             ),
         ),
         ToolInput(
             tag="append_filter",
             input_type=Boolean(optional=True),
             prefix="--append-filter",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-A) append the existing filter tag, don't just replace it"
             ),
         ),
         ToolInput(
             tag="allele_tag",
             input_type=String(optional=True),
             prefix="--allele-tag",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-a) apply -t on a per-allele basis. adds or sets the corresponding INFO field tag"
             ),
         ),
         ToolInput(
             tag="invert",
             input_type=Boolean(optional=True),
             prefix="--invert",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-v) inverts the filter, e.g. grep -v"),
         ),
         ToolInput(
             tag="use_logical_or",
             input_type=Boolean(optional=True),
             prefix="--or",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc="(-o) use logical OR instead of AND to combine filters"
             ),
         ),
         ToolInput(
             tag="region",
             input_type=Array(BedTabix, optional=True),
             prefix="--region",
             separate_value_from_prefix=True,
             doc=InputDocumentation(
                 doc=
                 "(-r) specify a region on which to target the filtering, requires a BGZF compressed file "
                 "which has been indexed with tabix.  any number of regions may be specified."
             ),
         ),
     ]
Ejemplo n.º 17
0
    def constructor(self):
        self.input(
            "normal_inputs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of NORMAL FastqGz pairs. These are aligned separately and merged to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example='["normal_R1.fastq.gz", "normal_R2.fastq.gz"]',
            ),
        )
        self.input(
            "tumor_inputs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of TUMOR FastqGz pairs. These are aligned separately and merged to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example='["tumor_R1.fastq.gz", "tumor_R2.fastq.gz"]',
            ),
        )

        self.input(
            "normal_name",
            String(),
            doc=InputDocumentation(
                "Sample name for the NORMAL sample from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA24385_normal",
            ),
        )
        self.input(
            "tumor_name",
            String(),
            doc=InputDocumentation(
                "Sample name for the TUMOR sample from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA24385_tumor",
            ),
        )

        self.input(
            "cutadapt_adapters",
            File(optional=True),
            doc=InputDocumentation(
                "Specifies a containment list for cutadapt, which contains a list of sequences to determine valid overrepresented sequences from "
                "the FastQC report to trim with Cuatadapt. The file must contain sets of named adapters in the form: "
                "``name[tab]sequence``. Lines prefixed with a hash will be ignored.",
                quality=InputQualityType.static,
                example=
                "https://github.com/csf-ngs/fastqc/blob/master/Contaminants/contaminant_list.txt",
            ),
        )
        self.input(
            "gatk_intervals",
            Array(Bed),
            doc=InputDocumentation(
                "List of intervals over which to split the GATK variant calling",
                quality=InputQualityType.static,
                example="BRCA1.bed",
            ),
        )

        self.input(
            "gridss_blacklist",
            Bed,
            doc=InputDocumentation(
                "BED file containing regions to ignore.",
                quality=InputQualityType.static,
                example="https://github.com/PapenfussLab/gridss#blacklist",
            ),
        )
        self.input(
            "vardict_intervals",
            Array(Bed),
            doc=InputDocumentation(
                "List of intervals over which to split the VarDict variant calling",
                quality=InputQualityType.static,
                example="BRCA1.bed",
            ),
        )
        self.input(
            "strelka_intervals",
            BedTabix,
            doc=InputDocumentation(
                "An interval for which to restrict the analysis to.",
                quality=InputQualityType.static,
                example="BRCA1.bed.gz",
            ),
        )

        self.input(
            "allele_freq_threshold",
            Float,
            default=0.05,
            doc=InputDocumentation(
                "The threshold for VarDict's allele frequency, default: 0.05 or 5%",
                quality=InputQualityType.configuration,
                example=None,
            ),
        )

        self.input(
            "reference",
            FastaWithDict,
            doc=InputDocumentation(
                """\
The reference genome from which to align the reads. This requires a number indexes (can be generated \
with the 'IndexFasta' pipeline This pipeline has been tested using the HG38 reference set.

This pipeline expects the assembly references to be as they appear in the GCP example:

- (".fai", ".amb", ".ann", ".bwt", ".pac", ".sa", "^.dict").""",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.fasta",
            ),
        )

        self.input(
            "snps_dbsnp",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "(WARNING: The file available from the genomics-public-data resource on Google Cloud Storage is NOT compressed and indexed. This will need to be completed prior to starting the pipeline.\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.gz",
            ),
        )
        self.input(
            "snps_1000gp",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz",
            ),
        )
        self.input(
            "known_indels",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz",
            ),
        )
        self.input(
            "mills_indels",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
            ),
        )

        self.step(
            "normal",
            self.process_subpipeline(
                reads=self.normal_inputs,
                sample_name=self.normal_name,
                reference=self.reference,
                cutadapt_adapters=self.cutadapt_adapters,
            ),
        )
        self.step(
            "tumor",
            self.process_subpipeline(
                reads=self.tumor_inputs,
                sample_name=self.tumor_name,
                reference=self.reference,
                cutadapt_adapters=self.cutadapt_adapters,
            ),
        )

        self.step(
            "vc_gatk",
            GatkSomaticVariantCaller_4_1_3(
                normal_bam=self.tumor.out,
                tumor_bam=self.normal.out,
                normal_name=self.normal_name,
                tumor_name=self.tumor_name,
                intervals=self.gatk_intervals,
                reference=self.reference,
                snps_dbsnp=self.snps_dbsnp,
                snps_1000gp=self.snps_1000gp,
                known_indels=self.known_indels,
                mills_indels=self.mills_indels,
            ),
            scatter="intervals",
        )

        self.step("vc_gatk_merge", Gatk4GatherVcfs_4_1_3(vcfs=self.vc_gatk))

        self.step(
            "vc_strelka",
            IlluminaSomaticVariantCaller(
                normal_bam=self.normal.out,
                tumor_bam=self.tumor.out,
                intervals=self.strelka_intervals,
                reference=self.reference,
            ),
        )

        self.step(
            "vc_gridss",
            Gridss_2_6_2(
                bams=[self.normal.out, self.tumor.out],
                reference=self.reference,
                blacklist=self.gridss_blacklist,
            ),
        )

        self.step(
            "generate_vardict_headerlines",
            GenerateVardictHeaderLines(reference=self.reference),
        )
        self.step(
            "vc_vardict",
            VardictSomaticVariantCaller(
                normal_bam=self.tumor.out,
                tumor_bam=self.normal.out,
                normal_name=self.normal_name,
                tumor_name=self.tumor_name,
                header_lines=self.generate_vardict_headerlines.out,
                intervals=self.vardict_intervals,
                reference=self.reference,
                allele_freq_threshold=self.allele_freq_threshold,
            ),
            scatter="intervals",
        )

        self.step("vc_vardict_merge",
                  Gatk4GatherVcfs_4_1_3(vcfs=self.vc_vardict.out))

        self.step(
            "combine_variants",
            CombineVariants_0_0_4(
                normal=self.normal_name,
                tumor=self.tumor_name,
                vcfs=[
                    self.vc_gatk_merge.out,
                    self.vc_strelka.out,
                    self.vc_vardict_merge.out,
                ],
                type="somatic",
                columns=["AD", "DP", "GT"],
            ),
        )
        self.step("sortCombined",
                  BcfToolsSort_1_9(vcf=self.combine_variants.vcf))

        # Outputs

        self.output(
            "normal_report",
            source=self.normal.reports,
            output_folder="reports",
            doc="A zip file of the NORMAL FastQC quality reports.",
        )
        self.output(
            "tumor_report",
            source=self.tumor.reports,
            output_folder="reports",
            doc="A zip file of the TUMOR FastQC quality reports.",
        )

        self.output(
            "normal_bam",
            source=self.normal.out,
            output_folder="bams",
            output_name=self.normal_name,
            doc="Aligned and indexed NORMAL bam",
        )
        self.output(
            "tumor_bam",
            source=self.tumor.out,
            output_folder="bams",
            output_name=self.tumor_name,
            doc="Aligned and indexed TUMOR bam",
        )
        self.output(
            "gridss_assembly",
            source=self.vc_gridss.assembly,
            output_folder="bams",
            doc="Assembly returned by GRIDSS",
        )

        self.output(
            "variants_gatk",
            source=self.vc_gatk_merge.out,
            output_folder="variants",
            doc="Merged variants from the GATK caller",
        )
        self.output(
            "variants_strelka",
            source=self.vc_strelka.out,
            output_folder="variants",
            doc="Variants from the Strelka variant caller",
        )
        self.output(
            "variants_vardict",
            source=self.vc_vardict_merge.out,
            output_folder="variants",
            doc="Merged variants from the VarDict caller",
        )
        self.output(
            "variants_gridss",
            source=self.vc_gridss.out,
            output_folder="variants",
            doc="Variants from the GRIDSS variant caller",
        )
        self.output(
            "variants",
            source=self.combine_variants.vcf,
            output_folder="variants",
            doc="Combined variants from all 3 callers",
        )
Ejemplo n.º 18
0
    def constructor(self):

        self.input(
            "sample_name",
            String,
            doc=InputDocumentation(
                "Sample name from which to generate the readGroupHeaderLine for BwaMem",
                quality=InputQualityType.user,
                example="NA12878",
            ),
        )

        self.input(
            "fastqs",
            Array(FastqGzPair),
            doc=InputDocumentation(
                "An array of FastqGz pairs. These are aligned separately and merged "
                "to create higher depth coverages from multiple sets of reads",
                quality=InputQualityType.user,
                example="[[BRCA1_R1.fastq.gz, BRCA1_R2.fastq.gz]]",
            ),
        )
        self.input(
            "reference",
            FastaWithDict,
            doc=InputDocumentation(
                """\
The reference genome from which to align the reads. This requires a number indexes (can be generated \
with the 'IndexFasta' pipeline This pipeline has been tested using the HG38 reference set.

This pipeline expects the assembly references to be as they appear in the GCP example:

- (".fai", ".amb", ".ann", ".bwt", ".pac", ".sa", "^.dict").""",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.fasta",
            ),
        )
        self.input(
            "cutadapt_adapters",
            File(optional=True),
            doc=InputDocumentation(
                "Specifies a containment list for cutadapt, which contains a list of sequences to determine valid overrepresented sequences from "
                "the FastQC report to trim with Cuatadapt. The file must contain sets of named adapters in the form: "
                "``name[tab]sequence``. Lines prefixed with a hash will be ignored.",
                quality=InputQualityType.static,
                example=
                "https://github.com/csf-ngs/fastqc/blob/master/Contaminants/contaminant_list.txt",
            ),
        )
        self.input(
            "gatk_intervals",
            Array(Bed),
            doc=InputDocumentation(
                "List of intervals over which to split the GATK variant calling",
                quality=InputQualityType.static,
                example="BRCA1.bed",
            ),
        )

        self.input(
            "snps_dbsnp",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "(WARNING: The file available from the genomics-public-data resource on Google Cloud Storage is NOT compressed and indexed. This will need to be completed prior to starting the pipeline.\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.gz",
            ),
        )
        self.input(
            "snps_1000gp",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz",
            ),
        )
        self.input(
            "known_indels",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz",
            ),
        )
        self.input(
            "mills_indels",
            VcfTabix,
            doc=InputDocumentation(
                "From the GATK resource bundle, passed to BaseRecalibrator as ``known_sites``",
                quality=InputQualityType.static,
                example=
                "HG38: https://console.cloud.google.com/storage/browser/genomics-public-data/references/hg38/v0/\n\n"
                "File: gs://genomics-public-data/references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
            ),
        )

        # STEPS

        self.step("fastqc", FastQC_0_11_5(reads=self.fastqs), scatter="reads")

        self.step(
            "getfastqc_adapters",
            ParseFastqcAdaptors(
                fastqc_datafiles=self.fastqc.datafile,
                cutadapt_adaptors_lookup=self.cutadapt_adapters,
            ),
            scatter="fastqc_datafiles",
            # when=NotNullOperator(self.cutadapt_adapters)
        )

        self.step(
            "align_and_sort",
            BwaAligner(
                fastq=self.fastqs,
                reference=self.reference,
                sample_name=self.sample_name,
                sortsam_tmpDir=".",
                cutadapt_adapter=self.getfastqc_adapters,
                cutadapt_removeMiddle3Adapter=self.getfastqc_adapters,
            ),
            scatter=[
                "fastq", "cutadapt_adapter", "cutadapt_removeMiddle3Adapter"
            ],
        )

        self.step(
            "merge_and_mark",
            MergeAndMarkBams_4_1_3(bams=self.align_and_sort,
                                   sampleName=self.sample_name),
        )

        # VARIANT CALLERS

        # GATK
        self.step(
            "vc_gatk",
            GatkGermlineVariantCaller_4_1_3(
                bam=self.merge_and_mark,
                intervals=self.gatk_intervals,
                reference=self.reference,
                snps_dbsnp=self.snps_dbsnp,
                snps_1000gp=self.snps_1000gp,
                known_indels=self.known_indels,
                mills_indels=self.mills_indels,
            ),
            scatter="intervals",
        )

        self.step("vc_gatk_merge", Gatk4GatherVcfs_4_0(vcfs=self.vc_gatk.out))
        # sort

        self.step("sort_combined",
                  BcfToolsSort_1_9(vcf=self.vc_gatk_merge.out))

        self.output(
            "bam",
            source=self.merge_and_mark.out,
            output_folder=["bams", self.sample_name],
            output_name=self.sample_name,
            doc="Aligned and indexed bam.",
        )
        self.output(
            "reports",
            source=self.fastqc.out,
            output_folder=["reports", self.sample_name],
            doc="A zip file of the FastQC quality report.",
        )
        self.output(
            "variants",
            source=self.sort_combined.out,
            output_folder="variants",
            output_name=self.sample_name,
            doc="Merged variants from the GATK caller",
        )
        self.output(
            "variants_split",
            source=self.vc_gatk.out,
            output_folder=["variants", "byInterval"],
            doc="Unmerged variants from the GATK caller (by interval)",
        )