def inputs(self): return [ *super(Gatk4CollectInsertSizeMetricsBase, self).inputs(), ToolInput( "bam", BamBai(optional=False), prefix="-I", doc="Input SAM or BAM file. Required.", position=10, ), ToolInput( "outputFilename", Filename( prefix=InputSelector("bam", remove_file_extension=True), extension=".txt", suffix=".metrics", ), prefix="-O", doc="File to write the output to. Required.", ), ToolInput( "outputHistogram", Filename( prefix=InputSelector("bam", remove_file_extension=True), extension=".pdf", suffix=".histogram", ), prefix="-H", doc="File to write insert size Histogram chart to. Required. ", ), *Gatk4CollectInsertSizeMetricsBase.additional_args, ]
def inputs(self): return [ ToolInput( "bam", BamBai(), prefix="-I", position=10, secondaries_present_as={".bai": "^.bai"}, doc= "One or more input SAM or BAM files to analyze. Must be coordinate sorted.", ), ToolInput( "outputFilename", Filename(extension=".bam"), position=10, prefix="-O", doc="File to write duplication metrics to", ), ToolInput( "metricsFilename", Filename(extension=".metrics.txt"), position=10, prefix="-M", doc="The output file to write marked records to.", ), *super(Gatk4MarkDuplicatesBase, self).inputs(), *self.additional_args, ]
def inputs(self): return [ *super().inputs(), *Gatk4CalculateContaminationBase.additional_args, ToolInput( "pileupTable", File(), prefix="-I", doc="pileup table from summarize pileup", ), ToolInput( "segmentationFileOut", Filename( prefix=InputSelector("pileupTable", remove_file_extension=True), extension=".mutect2_segments", ), prefix="--tumor-segmentation", doc="Reference sequence file", ), ToolInput( "contaminationFileOut", Filename( prefix=InputSelector("pileupTable", remove_file_extension=True), extension=".mutect2_contamination", ), position=2, prefix="-O", ), ]
def inputs(self): return [ ToolInput("bams", Array(Bam()), position=10), ToolInput("reference", FastaWithDict(), position=1, prefix="--reference"), ToolInput( "outputFilename", Filename(suffix=".svs", extension=".vcf"), position=2, prefix="--output", ), ToolInput( "assemblyFilename", Filename(suffix=".assembled", extension=".bam"), position=3, prefix="--assembly", ), ToolInput("threads", Int(optional=True), default=CpuSelector(), prefix="--threads"), ToolInput("blacklist", Bed(optional=True), position=4, prefix="--blacklist"), ToolInput("tmpdir", String(optional=True), default="./TMP", prefix="--workingdir"), ]
def inputs(self) -> List[ToolInput]: import uuid fastq_uuid = str(uuid.uuid1()) return [ ToolInput("fastq", FastqGzPair, position=5), ToolInput( "adapter", input_type=Array(String(), optional=True), prefix="-a", prefix_applies_to_all_elements=True, doc= "Sequence of an adapter ligated to the 3' end (paired data: of the first read). " "The adapter and subsequent bases are trimmed. If a '$' character is appended ('anchoring'), " "the adapter is only found if it is a suffix of the read.", ), ToolInput( "outputFilename", Filename(suffix="-R1", extension=".fastq.gz"), prefix="-o", doc= "Write trimmed reads to FILE. FASTQ or FASTA format is chosen depending on input. " "The summary report is sent to standard output. Use '{name}' in FILE to demultiplex " "reads into multiple files. Default: write to standard output", ), ToolInput( "secondReadFile", Filename(suffix="-R2", extension=".fastq.gz"), prefix="-p", doc="Write second read in a pair to FILE.", ), *self.additional_args, ]
def inputs(self): # Would be good to include this in the prefix: # If(InputSelector("bam").length().equals(1), InputSelector("bam")[0].basename(), None) prefix = FirstOperator([InputSelector("outputPrefix"), "generated"]) return [ ToolInput( "bam", Array(Bam), prefix="-I", position=10, # secondaries_present_as={".bai": "^.bai"}, doc= "One or more input SAM or BAM files to analyze. Must be coordinate sorted.", ), ToolInput("outputPrefix", String(optional=True)), ToolInput( "outputFilename", Filename(prefix=prefix, suffix=".markduped", extension=".bam"), position=10, prefix="-O", doc="File to write duplication metrics to", ), ToolInput( "metricsFilename", Filename(prefix=prefix, suffix=".metrics", extension=".txt"), position=10, prefix="-M", doc="The output file to write marked records to.", ), *super().inputs(), *self.additional_args, ]
def test_input_value_filename_nostringenv(self): fn = Filename() self.assertEqual( '"%s"' % fn.generated_filename(), wdl.get_input_value_from_potential_selector_or_generator( fn, None, string_environment=False), )
def test_input_value_filename_stringenv(self): import uuid fn = Filename(guid=str(uuid.uuid4())) self.assertEqual( fn.generated_filename(), wdl.get_input_value_from_potential_selector_or_generator( fn, None, string_environment=True ), )
def inputs(self): return [ *super(Gatk4HaplotypeCallerBase, self).inputs(), *Gatk4HaplotypeCallerBase.optional_args, ToolInput( "inputRead", BamBai(), doc="BAM/SAM/CRAM file containing reads", prefix="--input", secondaries_present_as={".bai": "^.bai"}, ), ToolInput( "reference", FastaWithDict(), position=5, prefix="--reference", doc="Reference sequence file", ), ToolInput( "outputFilename", Filename( prefix=InputSelector("inputRead", remove_file_extension=True), extension=".vcf.gz", ), position=8, prefix="--output", doc="File to which variants should be written", ), ToolInput( "dbsnp", VcfTabix(optional=True), position=7, prefix="--dbsnp", doc="(Also: -D) A dbSNP VCF file.", ), ToolInput( "intervals", Bed(optional=True), prefix="--intervals", doc= "-L (BASE) One or more genomic intervals over which to operate", ), ToolInput( "outputBamName", Filename( prefix=InputSelector("inputRead", remove_file_extension=True), extension=".bam", ), position=8, prefix="-bamout", doc="File to which assembled haplotypes should be written", ), ]
def inputs(self) -> List[ToolInput]: return [ ToolInput( "outputFilename", Filename(extension=".vcf", suffix=".combined"), prefix="-o", ), ToolInput( "regions", Filename(extension=".tsv"), prefix="--regions", doc= "Region file containing all the variants, used as samtools mpileup", ), ToolInput( "vcfs", Array(Vcf()), prefix="-i", prefix_applies_to_all_elements=True, doc= "input vcfs, the priority of the vcfs will be based on the order of the input", ), ToolInput("type", String(), prefix="--type", doc="germline | somatic"), ToolInput( "columns", Array(String(), optional=True), prefix="--columns", prefix_applies_to_all_elements=True, doc="Columns to keep, seperated by space output vcf (unsorted)", ), ToolInput( "normal", String(optional=True), prefix="--normal", doc= "Sample id of germline vcf, or normal sample id of somatic vcf", ), ToolInput( "tumor", String(optional=True), prefix="--tumor", doc="tumor sample ID, required if inputs are somatic vcfs", ), ToolInput( "priority", Int(optional=True), prefix="--priority", doc= "The priority of the callers, must match with the callers in the source header", ), ]
def inputs(self): return [ *super().inputs(), ToolInput( "callVCF", VcfTabix(), prefix="--CALL_VCF", doc="The VCF containing the call sample", ), ToolInput( "truthVCF", VcfIdx(), prefix="--TRUTH_VCF", doc="The VCF containing the truth sample", ), ToolInput( "outputBasename", Filename(), prefix="--OUTPUT", doc="Basename for the three metrics files that are to be written. Resulting files will be:" "(1) .genotype_concordance_summary_metrics, " "(2) .genotype_concordance_detail_metrics, " "(3) .genotype_concordance_contingency_metrics.", ), # *super(Gatk4GenotypeConcordanceBase, self).inputs(), *self.additional_args, ]
def inputs(self) -> List[ToolInput]: return [ ToolInput("intervals", Bed(), position=2, shell_quote=False), ToolInput( "outputFilename", Filename(extension=".vcf", suffix=".vardict"), prefix=">", position=6, shell_quote=False, ), ToolInput( "bam", BamBai(), prefix="-b", position=1, shell_quote=False, doc="The indexed BAM file", ), ToolInput( "reference", FastaFai(), prefix="-G", position=1, shell_quote=False, doc="The reference fasta. Should be indexed (.fai). " "Defaults to: /ngs/reference_data/genomes/Hsapiens/hg19/seq/hg19.fa", ), *VarDictGermlineBase.vardict_inputs, *VarDictGermlineBase.var2vcf_inputs, ]
def inputs(self): return [ *super(Gatk4ApplyBqsrBase, self).inputs(), ToolInput( "bam", BamBai(), prefix="-I", doc="The SAM/BAM/CRAM file containing reads.", secondaries_present_as={".bai": "^.bai"}, position=10, ), ToolInput( "reference", FastaWithDict(), prefix="-R", doc="Reference sequence" ), ToolInput( "outputFilename", Filename(extension=".bam"), prefix="-O", doc="Write output to this file", ), ToolInput( "recalFile", Tsv(optional=True), prefix="--bqsr-recal-file", doc="Input recalibration table for BQSR", ), ToolInput( "intervals", Bed(optional=True), prefix="--intervals", doc="-L (BASE) One or more genomic intervals over which to operate", ), *self.additional_args, ]
def inputs(self): return [ ToolInput( "kmer_size", Int(optional=True), prefix="-k", position=1, doc="k-mer (odd) length (default: 31, max value: 31)", ), ToolInput( "index", Filename(extension=".kidx"), prefix="-i", position=2, doc="Filename for the kallisto index to be constructed", ), ToolInput( "reference", Fasta, position=3, localise_file=True, doc="Filename for a reference transcriptome", ), # --make-unique Replace repeated target names with unique names ]
def inputs(self): return [ ToolInput( "flagstat", File(), prefix="--flagstat", doc="output of samtools flagstat on bam", ), ToolInput( "collectInsertSizeMetrics", File, prefix="--collect_insert_metrics", doc="output of CollectInsertMetrics (GATK or Picard) on bam", ), ToolInput( "coverage", File(), prefix="--coverage", doc="output of bedtools coverageBed for targeted bam; bedtools genomeCoverageBed for whole genome bam", ), ToolInput( "outputPrefix", Filename(extension=".csv"), prefix="-o", doc="prefix of output summary csv", ), *self.additional_args, ]
def inputs(self): return [ *super(Gatk4SortSamBase, self).inputs(), ToolInput( "bam", Bam(), prefix="-I", doc="The SAM/BAM/CRAM file to sort.", position=10, ), ToolInput( "outputFilename", Filename( prefix=InputSelector("bam", remove_file_extension=True), suffix=".sorted", extension=".bam", ), position=10, prefix="-O", doc="The sorted SAM/BAM/CRAM output file.", ), ToolInput( "sortOrder", String(), prefix="-SO", position=10, doc= "The --SORT_ORDER argument is an enumerated type (SortOrder), which can have one of " "the following values: [unsorted, queryname, coordinate, duplicate, unknown]", ), *Gatk4SortSamBase.additional_args, ]
def inputs(self): return [ *super().inputs(), *Gatk4GetPileUpSummariesBase.additional_args, ToolInput( "bam", Array(BamBai()), prefix="-I", prefix_applies_to_all_elements=True, doc="The SAM/BAM/CRAM file containing reads.", position=0, ), ToolInput( "sites", VcfTabix(), prefix="-V", doc="sites of common biallelic variants", ), ToolInput( "intervals", VcfTabix(optional=True), prefix="--intervals", doc= "-L (BASE) One or more genomic intervals over which to operate", ), ToolInput("pileupTableOut", Filename(extension=".txt"), position=1, prefix="-O"), ]
def test_resolve_filename_in_inpselect(self): fn = Filename(extension=".ext") ti = {"ti": ToolInput("ti", fn)} b = StringFormatter("fn: {place}", place=InputSelector("ti")) res = wdl.get_input_value_from_potential_selector_or_generator(b, ti) self.assertEqual( f'fn: ~{{select_first([ti, "{fn.generated_filename()}"])}}', res)
def inputs(self): return [ *super(Gatk4LearnReadOrientationModelBase, self).inputs(), *Gatk4LearnReadOrientationModelBase.additional_args, ToolInput( "f1r2CountsFiles", Array(TarFileGz), position=0, prefix="-I", prefix_applies_to_all_elements=True, doc="Counts for the read orientation of fragments", ), ToolInput( "numEmIterations", Int(optional=True), position=1, prefix="--num-em-iterations", default=30, # Sebastian thinks this is best doc="Amount of iterations for the em process before it bails", ), ToolInput("modelFileOut", Filename(extension=".tar.gz"), position=3, prefix="-O"), ]
def inputs(self) -> List[ToolInput]: return [ ToolInput("vcf", Vcf(optional=True), position=1, shell_quote=False), ToolInput( "compressedTabixVcf", VcfTabix(optional=True), position=1, shell_quote=False, ), ToolInput( "compressedVcf", CompressedVcf(optional=True), position=1, shell_quote=False, ), ToolInput( "reference", FastaWithDict(), prefix="-r", position=4, shell_quote=False, ), ToolInput( "outputFilename", Filename(extension=".vcf", suffix=".norm"), position=6, prefix="-o", shell_quote=False, ), ]
def inputs(self): return [ *super().inputs(), ToolInput( "bams", Array(BamBai()), prefix="-I", prefix_applies_to_all_elements=True, doc="The SAM/BAM file to sort.", position=10, ), ToolInput( "sampleName", String(optional=True), doc="Used for naming purposes only", ), ToolInput( "outputFilename", Filename( prefix=InputSelector("sampleName"), suffix=".merged", extension=".bam", ), position=10, prefix="-O", doc="SAM/BAM file to write merged result to", ), *self.additional_args, ]
def inputs(self): return [ ToolInput( "ubam", BamBai(), prefix="--UNMAPPED_BAM", prefix_applies_to_all_elements=True, doc= "Original SAM or BAM file of unmapped reads, which must be in queryname order.", position=10, ), ToolInput( "bam", Array(BamBai()), prefix="--ALIGNED_BAM", prefix_applies_to_all_elements=True, doc="SAM or BAM file(s) with alignment data.", position=10, ), ToolInput( "reference", FastaWithDict(optional=True), prefix="--REFERENCE_SEQUENCE", position=10, doc="Reference sequence file.", ), ToolInput( "outputFilename", Filename(extension=".bam"), position=10, prefix="--OUTPUT", doc="Merged SAM or BAM file to write to.", ), *self.additional_args, ]
def inputs(self): return [ ToolInput( "vcf", UnionType(Vcf, CompressedVcf), position=1, doc="The VCF file to sort", ), ToolInput( "outputFilename", Filename(suffix=".sorted", extension=".vcf.gz"), prefix="--output-file", doc="(-o) output file name [stdout]", ), ToolInput( "outputType", String(optional=True), prefix="--output-type", default="z", doc="(-O) b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]", ), ToolInput( "tempDir", String(optional=True), prefix="--temp-dir", doc="(-T) temporary files [/tmp/bcftools-sort.XXXXXX/]", ), ]
def inputs(self): return [ *self.additional_inputs, ToolInput( "bam", Array(Bam), position=10, doc= "A list of SAM or BAM format files. They can be either name or location sorted. If no files provided, <stdin> input is expected. Location-sorted paired-end reads are automatically sorted by read names.", ), ToolInput( "outputFilename", Filename(extension=".txt"), prefix="-o", doc= "Name of output file including read counts. A separate file including summary statistics of counting results is also included in the output ('<string>.summary'). Both files are in tab delimited format.", ), ToolInput( "annotationFile", File, prefix="-a", doc= "Name of an annotation file. GTF/GFF format by default. See -F option for more format information. Inbuilt annotations (SAF format) is available in 'annotation' directory of the package. Gzipped file is also accepted.", ), ]
def inputs(self): return [ *super(SamToolsViewBase, self).inputs(), *SamToolsViewBase.additional_inputs, ToolInput("sam", UnionType(Sam(), Bam(), Cram()), position=10), ToolInput( "reference", FastaWithDict(optional=True), position=6, prefix="-T", doc= "A FASTA format reference FILE, optionally compressed by bgzip and ideally indexed " "by samtools faidx. If an index is not present, one will be generated for you.", ), ToolInput( "outputFilename", Filename( prefix=InputSelector("sam", remove_file_extension=True), extension=".bam", ), position=5, prefix="-o", doc="Output to FILE [stdout].", ), ToolInput( "regions", Array(String, optional=True), position=11, doc= "Region specifications after the input filename to restrict output to only those alignments which " "overlap the specified region(s). Use of region specifications requires a coordinate-sorted and " "indexed input file (in BAM or CRAM format)", ), ]
def inputs(self) -> List[ToolInput]: return [ ToolInput( "vcf", UnionType(Vcf, CompressedVcf), position=1, doc="Input vcf", ), ToolInput( "outputFilename", Filename( InputSelector("vcf", remove_file_extension=True), suffix=".fill", extension=".vcf", ), position=6, doc="Output vcf", ), ToolInput( "column", String(), prefix="--column", position=3, doc="REF or INFO tag, e.g. AA for ancestral allele", ), ToolInput("fasta", Fasta(), prefix="--fasta", position=3, doc="fasta file"), ToolInput( "header_lines", File(optional=True), prefix="--header-lines", position=3, doc="optional file containing header lines to append", ), ToolInput( "include", String(optional=True), prefix="--include", position=3, doc="annotate only records passing filter expression", ), ToolInput( "exclude", String(optional=True), prefix="--exclude", position=3, doc="annotate only records failing filter expression", ), ToolInput( "replace_non_ACGTN", Boolean(optional=True), prefix="--replace-non-ACGTN", position=3, doc="replace non-ACGTN characters with N", ), ]
def inputs(self): return [ *super().inputs(), *Gatk4GetPileUpSummariesBase.additional_args, ToolInput( "bam", Array(BamBai()), prefix="-I", prefix_applies_to_all_elements=True, doc="The SAM/BAM/CRAM file containing reads.", position=0, ), ToolInput( "sampleName", String(optional=True), doc="Used for naming purposes" ), ToolInput( "sites", VcfTabix(), prefix="-V", doc="sites of common biallelic variants", ), ToolInput( "intervals", Bed(optional=True), prefix="--intervals", doc="-L (BASE) One or more genomic intervals over which to operate", ), ToolInput( "pileupTableOut", Filename( prefix=JoinOperator( FilterNullOperator( [ FirstOperator( [InputSelector("sampleName"), "generated"] ), # If( # IsDefined(InputSelector("intervals")), # InputSelector( # "intervals", remove_file_extension=True # ), # "", # ), ] ), ".", ), extension=".txt", ), position=1, prefix="-O", ), ToolInput( "reference", FastaWithDict(optional=True), prefix="-R", doc="reference to use when decoding CRAMS", ), ]
def inputs(self): return [ ToolInput("files", Array(File()), position=2, localise_file=True), ToolInput( "files2", Array(File(), optional=True), position=3, localise_file=True ), ToolInput("outputFilename", Filename(extension=".tar"), position=1), ]
def inputs(self): return [ ToolInput( "listFile", File(optional=True), prefix="--list", doc= "List file: A tsv file contains SampleName\tPathToBedtoolsOutput on each line", ), ToolInput( "sampleName", String(optional=True), prefix="--name", doc="Sample name if list not used", ), ToolInput( "bedtoolsOutputPath", File(optional=True), prefix="--path", doc="Path to bedtools output if list not used", ), ToolInput( "outputGeneFile", Filename(extension=".txt", suffix=".gene"), prefix="--gene", doc="Output gene file", ), ToolInput( "outputRegionFile", Filename(extension=".txt", suffix=".region"), prefix="--region", doc="Output region file", ), ToolInput( "fold", String(optional=True), prefix="--fold", doc="Folds, quoted and commna sepparated, default 1,10,20,100", ), ToolInput( "threads", Int(optional=True), prefix="--threads", doc="number of threads, default:32", ), ]
def inputs(self): return [ ToolInput("reference", FastaBwa(), position=9), ToolInput("reads", Array(FastqGz()), position=10, doc=None), ToolInput("mates", FastqGzPair(optional=True), position=11, doc=None), ToolInput("outputFilename", Filename(extension=".sam")), *BwaMemBase.additional_inputs, ]