def inputs(self): return [ ToolInput( "flagstat", File(), prefix="--flagstat", doc="output of samtools flagstat on bam", ), ToolInput( "collectInsertSizeMetrics", File, prefix="--collect_insert_metrics", doc="output of CollectInsertMetrics (GATK or Picard) on bam", ), ToolInput( "coverage", File(), prefix="--coverage", doc="output of bedtools coverageBed for targeted bam; bedtools genomeCoverageBed for whole genome bam", ), ToolInput( "outputPrefix", Filename(extension=".csv"), prefix="-o", doc="prefix of output summary csv", ), *self.additional_args, ]
def outputs(self): return [ ToolOutput( "configPickle", File(), glob=InputSelector("rundir") + "/runWorkflow.py.config.pickle", ), ToolOutput("script", File(), glob=InputSelector("rundir") + "/runWorkflow.py"), ToolOutput( "stats", Tsv(), glob=InputSelector("rundir") + "/results/stats/runStats.tsv", doc= "A tab-delimited report of various internal statistics from the variant calling process: " "Runtime information accumulated for each genome segment, excluding auxiliary steps such " "as BAM indexing and vcf merging. Indel candidacy statistics", ), ToolOutput( "indels", VcfTabix(), glob=InputSelector("rundir") + "/results/variants/somatic.indels.vcf.gz", doc="", ), ToolOutput( "snvs", VcfTabix(), glob=InputSelector("rundir") + "/results/variants/somatic.snvs.vcf.gz", doc="", ), ]
def inputs(self): return [ *self.additional_inputs, ToolInput( "inputBam", Bam(optional=True), prefix="-ibam", doc= "Input bam file. Note: BAM _must_ be sorted by position. A 'samtools sort <BAM>' should suffice.", ), ToolInput( "inputBed", File(optional=True), prefix="-iBed", doc= "Input bed file. Must be grouped by chromosome. A simple 'sort -k 1,1 <BED> > <BED>.sorted' will suffice.", ), ToolInput( "inputFile", File(optional=True), prefix="-i", doc="Input file, can be gff/vcf.", ), ToolInput( "genome", File(optional=True), prefix="-g", doc= "Genome file. The genome file should tab delimited and structured as follows: <chromName><TAB><chromSize>.", ), ]
def inputs(self): return [ ToolInput( "bam", BamBai(), prefix="-I", doc="Input file containing sequence data (BAM or CRAM)", secondaries_present_as={".bai": "^.bai"}, position=10, ), ToolInput("reference", FastaWithDict(), prefix="-R", doc="Reference sequence file"), ToolInput( "outputPrefix", String(), prefix="-o", doc= "An output file created by the walker. Will overwrite contents if file exists", ), ToolInput( "intervals", File(optional=True), prefix="-L", doc="One or more genomic intervals over which to operate", ), ToolInput( "excludeIntervals", File(optional=True), prefix="--excludeIntervals", doc="One or more genomic intervals to exclude from processing", ), *self.additional_args, ]
def outputs(self): return [ ToolOutput( "summaryMetrics", File(), glob=WildcardSelector( "*.genotype_concordance_summary_metrics", select_first=True ), ), ToolOutput( "detailMetrics", File(), glob=WildcardSelector( "*.genotype_concordance_detail_metrics", select_first=True ), ), ToolOutput( "contingencyMetrics", File(), glob=WildcardSelector( "*.genotype_concordance_contingency_metrics", select_first=True ), ), # ToolOutput("vcf", VcfIdx(optional=True), glob=WildcardSelector("*.vcf")) ]
def inputs(self): return [ ToolInput("files", Array(File()), position=2, localise_file=True), ToolInput( "files2", Array(File(), optional=True), position=3, localise_file=True ), ToolInput("outputFilename", Filename(extension=".tar"), position=1), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "python", File(), glob=InputSelector("runDir") + "/runWorkflow.py" ), ToolOutput( "pickle", File(), glob=InputSelector("runDir") + "/runWorkflow.py.config.pickle", ), ToolOutput( "candidateSV", VcfTabix(), glob=InputSelector("runDir") + "/results/variants/candidateSV.vcf.gz", ), ToolOutput( "candidateSmallIndels", VcfTabix(), glob=InputSelector("runDir") + "/results/variants/candidateSmallIndels.vcf.gz", ), ToolOutput( "diploidSV", VcfTabix(), glob=InputSelector("runDir") + "/results/variants/diploidSV.vcf.gz", ), ToolOutput( "alignmentStatsSummary", File(), glob=InputSelector("runDir") + "/results/stats/alignmentStatsSummary.txt", ), ToolOutput( "svCandidateGenerationStats", Tsv(), glob=InputSelector("runDir") + "/results/stats/svCandidateGenerationStats.tsv", ), ToolOutput( "svLocusGraphStats", Tsv(), glob=InputSelector("runDir") + "/results/stats/svLocusGraphStats.tsv", ), # optional outputs ToolOutput( "somaticSV", VcfTabix(optional=True), glob=InputSelector("runDir") + "/results/variants/somaticSV.vcf.gz", ), ToolOutput( "tumorSV", VcfTabix(optional=True), glob=InputSelector("runDir") + "/results/variants/tumorSV.vcf.gz", ), ]
def constructor(self): self.input("bam", BamBai) self.input("reference", FastaWithDict) # optional self.input("intervals", BedTabix(optional=True)) self.input("is_exome", Boolean(optional=True)) self.input("manta_config", File(optional=True)) self.input("strelka_config", File(optional=True)) self.step( "manta", Manta_1_5_0( bam=self.bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, config=self.manta_config, ), ) self.step( "strelka", StrelkaGermline_2_9_10( bam=self.bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, config=self.strelka_config, ), ) # normalise and filter "PASS" variants self.step( "splitnormalisevcf", SplitMultiAllele( vcf=self.strelka.variants.as_type(CompressedVcf), reference=self.reference, ), ) self.step( "filterpass", VcfToolsvcftoolsLatest( vcf=self.splitnormalisevcf.out, removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) self.output("sv", source=self.manta.diploidSV) self.output("variants", source=self.strelka.variants) self.output("out", source=self.filterpass.out)
def inputs(self): return [ ToolInput("file", File(optional=True)), ToolInput("files", Array(File(), optional=True), position=1), ToolInput( "number_output", Boolean(optional=True), prefix="-n", doc="Number the output lines, starting at 1.", ), ToolInput( "number_non_blank", Boolean(optional=True), prefix="-b", doc="Number the non-blank output lines, starting at 1.", ), ToolInput( "disable_output_buffer", Boolean(optional=True), prefix="-u", doc="Disable output buffering.", ), ToolInput( "squeeze", Boolean(optional=True), prefix="-s", doc= "Squeeze multiple adjacent empty lines, causing the output to be single spaced.", ), ToolInput( "display_nonprint_and_eol_chars", Boolean(optional=True), prefix="-e", doc= "Display non-printing characters (see the -v option), and display " "a dollar sign (`$') at the end of each line.", ), ToolInput( "display_nonprint_and_tab_chars", Boolean(optional=True), prefix="-t", doc= "Display non-printing characters (see the -v option), and display tab characters as `^I'.", ), ToolInput( "display_nonprint_chars", Boolean(optional=True), prefix="-v", doc= "Display non-printing characters so they are visible. Control characters print as `^X' for " "control-X; the delete character (octal 0177) prints as `^?'. Non-ASCII characters (with the" " high bit set) are printed as `M-' (for meta) followed by the character for the low 7 bits.", ), ]
def outputs(self): return [ ToolOutput( "out_summary", File(), glob=InputSelector("outputPrefix") + ".txt", ), ToolOutput( "out_purity_png", File(), glob=InputSelector("outputPrefix") + "_purity.png", ), ToolOutput( "out_purity_seg", File(), glob=InputSelector("outputPrefix") + "_purity.seg", ), ToolOutput( "out_purity_rds", File(), glob=InputSelector("outputPrefix") + "_purity.rds", ), ToolOutput( "out_hisens_png", File(), glob=InputSelector("outputPrefix") + "_hisens.png", ), ToolOutput( "out_hisens_seg", File(), glob=InputSelector("outputPrefix") + "_hisens.seg", ), ToolOutput( "out_hisens_rds", File(), glob=InputSelector("outputPrefix") + "_hisens.rds", ), ToolOutput( "out_arm_level", File(optional=True), glob=InputSelector("outputPrefix") + ".arm_level.txt", ), ToolOutput( "out_gene_level", File(optional=True), glob=InputSelector("outputPrefix") + ".gene_level.txt", ), ToolOutput( "out_qc", File(optional=True), glob=InputSelector("outputPrefix") + ".qc.txt", ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("out", VcfTabix(optional=True), selector=InputSelector("outputFilename")), ToolOutput("out_stdout", Stdout), ToolOutput("out_stats", File(optional=True, extension=".html"), selector=InputSelector("statsFile")), ToolOutput("out_warnings", File(optional=True, extension=".txt"), selector=InputSelector("warningFile")), ]
def outputs(self): return [ ToolOutput( "unalignedReads", output_type=Array(FastqGz()), glob=WildcardSelector("*/*.fastq.gz"), ), ToolOutput("stats", output_type=Array(File()), glob=WildcardSelector("Stats/*")), ToolOutput("interop", output_type=Array(File()), glob=WildcardSelector("InterOp/*")), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("vcf", Vcf(), glob=WildcardSelector("*.vcf")), ToolOutput( "used_options", File(optional=True), glob=WildcardSelector("PiscesLogs/*.json"), ), ToolOutput( "strandmetrics", File(optional=True), glob=WildcardSelector("*ReadStrandBias.txt"), ), ]
def outputs(self): return [ ToolOutput( "contOut", File(), glob=InputSelector("contaminationFileOut"), doc="contamination Table", ), ToolOutput( "segOut", File(), glob=InputSelector("segmentationFileOut"), doc="segmentation based on baf", ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("out", Bam(), glob=WildcardSelector("*")), ToolOutput("used_options", File(optional=True), glob=WildcardSelector("HygeaLogs/*.json")), ]
def inputs(self): return [ *super().inputs(), *Gatk4CalculateContaminationBase.additional_args, ToolInput( "pileupTable", File(), prefix="-I", doc="pileup table from summarize pileup", ), ToolInput( "segmentationFileOut", Filename( prefix=InputSelector("pileupTable", remove_file_extension=True), extension=".mutect2_segments", ), prefix="--tumor-segmentation", doc="Reference sequence file", ), ToolInput( "contaminationFileOut", Filename( prefix=InputSelector("pileupTable", remove_file_extension=True), extension=".mutect2_contamination", ), position=2, prefix="-O", ), ]
def inputs(self) -> List[ToolInput]: return [ ToolInput( "vcf", UnionType(Vcf, CompressedVcf), position=1, doc="Input vcf", ), ToolInput( "outputFilename", Filename( InputSelector("vcf", remove_file_extension=True), suffix=".fill", extension=".vcf", ), position=6, doc="Output vcf", ), ToolInput( "column", String(), prefix="--column", position=3, doc="REF or INFO tag, e.g. AA for ancestral allele", ), ToolInput("fasta", Fasta(), prefix="--fasta", position=3, doc="fasta file"), ToolInput( "header_lines", File(optional=True), prefix="--header-lines", position=3, doc="optional file containing header lines to append", ), ToolInput( "include", String(optional=True), prefix="--include", position=3, doc="annotate only records passing filter expression", ), ToolInput( "exclude", String(optional=True), prefix="--exclude", position=3, doc="annotate only records failing filter expression", ), ToolInput( "replace_non_ACGTN", Boolean(optional=True), prefix="--replace-non-ACGTN", position=3, doc="replace non-ACGTN characters with N", ), ]
def inputs(self): return [ ToolInput( "listFile", File(optional=True), prefix="--list", doc= "List file: A tsv file contains SampleName\tPathToBedtoolsOutput on each line", ), ToolInput( "sampleName", String(optional=True), prefix="--name", doc="Sample name if list not used", ), ToolInput( "bedtoolsOutputPath", File(optional=True), prefix="--path", doc="Path to bedtools output if list not used", ), ToolInput( "outputGeneFile", Filename(extension=".txt", suffix=".gene"), prefix="--gene", doc="Output gene file", ), ToolInput( "outputRegionFile", Filename(extension=".txt", suffix=".region"), prefix="--region", doc="Output region file", ), ToolInput( "fold", String(optional=True), prefix="--fold", doc="Folds, quoted and commna sepparated, default 1,10,20,100", ), ToolInput( "threads", Int(optional=True), prefix="--threads", doc="number of threads, default:32", ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("std", Stdout), ToolOutput("out", File, glob=InputSelector("outputFilename")), ToolOutput( "stats", File(extension=".html"), glob=InputSelector("statsFile") ), ]
def outputs(self): return [ ToolOutput("out", TextFile(), glob=InputSelector("outputFilename")), ToolOutput( "outHistogram", File(extension=".pdf"), glob=InputSelector("outputHistogram"), ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("vcf", Vcf(optional=True), glob=WildcardSelector("*.vcf.recal")), ToolOutput( "used_options", File(optional=True), glob=WildcardSelector("VQRLogs/*.json"), ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "configPickle", File(), glob=InputSelector("relativeStrelkaDirectory") + "/runWorkflow.py.config.pickle", ), ToolOutput( "script", File(), glob=InputSelector("relativeStrelkaDirectory") + "/runWorkflow.py", ), ToolOutput( "stats", Tsv(), glob=InputSelector("relativeStrelkaDirectory") + "/results/stats/runStats.tsv", doc= "A tab-delimited report of various internal statistics from the variant calling process: " "Runtime information accumulated for each genome segment, excluding auxiliary steps such " "as BAM indexing and vcf merging. Indel candidacy statistics", ), ToolOutput( "variants", VcfTabix(), glob=InputSelector("relativeStrelkaDirectory") + "/results/variants/variants.vcf.gz", doc= "Primary variant inferences are provided as a series of VCF 4.1 files", ), ToolOutput( "genome", VcfTabix(), glob=InputSelector("relativeStrelkaDirectory") + "/results/variants/genome.vcf.gz", ), ]
def inputs(self) -> List[ToolInput]: return [ ToolInput("file", File(), position=100, doc="File to bgzip compress"), ToolInput( "outputFilename", Filename( prefix=InputSelector("file").basename(), extension=".gz", ), position=102, ), *self.additional_args, ]
def inputs(self): return [ ToolInput("inp", str), ToolInput("inpOptional", Optional[str]), ToolInput("fileInp", File(extension=".txt")), ToolInput("fileInpOptional", File(extension=".txt", optional=True)), ToolInput( "generatedInp", Filename(prefix=InputSelector("inp"), extension=""), position=0, ), ToolInput( "generatedInpOptional", Filename(prefix=InputSelector("inpOptional")), position=0, ), ToolInput( "generatedFileInp", Filename( prefix=InputSelector("fileInp", remove_file_extension=True), suffix=".transformed", extension=".fnp", ), position=0, ), ToolInput( "generatedFileInpOptional", Filename( prefix=InputSelector("fileInpOptional", remove_file_extension=True), suffix=".optional", extension=".txt", ), position=0, ), ]
def constructor(self): self.input("normalBam", CramCrai) self.input("tumorBam", CramCrai) self.input("reference", FastaFai) self.input("callRegions", BedTabix(optional=True)) self.input("exome", Boolean(optional=True), default=False) self.input("configStrelka", File(optional=True)) self.step( "manta", Manta( bam=self.normalBam, tumorBam=self.tumorBam, reference=self.reference, callRegions=self.callRegions, exome=self.exome, ), ) self.step( "strelka", Strelka( indelCandidates=self.manta.candidateSmallIndels, normalBam=self.normalBam, tumorBam=self.tumorBam, reference=self.reference, callRegions=self.callRegions, exome=self.exome, config=self.configStrelka, ), ) self.step( "normaliseSNVs", BcfToolsNorm(vcf=self.strelka.snvs, reference=self.reference), ) self.step("indexSNVs", BcfToolsIndex(vcf=self.normaliseSNVs.out)) self.step( "normaliseINDELs", BcfToolsNorm(vcf=self.strelka.indels, reference=self.reference), ) self.step("indexINDELs", BcfToolsIndex(vcf=self.normaliseINDELs.out)) self.output("diploid", source=self.manta.diploidSV) self.output("candIndels", source=self.manta.candidateSmallIndels) self.output("indels", source=self.indexINDELs.out) self.output("snvs", source=self.indexSNVs.out) self.output("somaticSVs", source=self.manta.somaticSVs)
def inputs(self): return [ ToolInput( "inputFile", File(), prefix="-i", doc="Gatk3 DepthOfCoverage interval_summary output", ), ToolInput( "outputFilename", Filename(extension=".txt"), prefix="-o", doc="Output file name", ), ToolInput("bed", Bed(), prefix="-bed", doc="Annotated bed file"), ]
def process_subpipeline(**connections): w = WorkflowBuilder("somatic_subpipeline") w.input("reference", FastaWithDict) w.input("reads", Array(FastqGzPair)) w.input("cutadapt_adapters", File(optional=True)) w.input("sample_name", String) w.step("fastqc", FastQC_0_11_5(reads=w.reads), scatter="reads") w.step( "getfastqc_adapters", ParseFastqcAdaptors( fastqc_datafiles=w.fastqc.datafile, cutadapt_adaptors_lookup=w.cutadapt_adapters, ), scatter="fastqc_datafiles", ) w.step( "align_and_sort", BwaAligner( fastq=w.reads, reference=w.reference, sample_name=w.sample_name, sortsam_tmpDir=".", cutadapt_adapter=w.getfastqc_adapters, cutadapt_removeMiddle3Adapter=w.getfastqc_adapters, ), scatter=[ "fastq", "cutadapt_adapter", "cutadapt_removeMiddle3Adapter" ], ) w.step( "merge_and_mark", MergeAndMarkBams_4_1_3(bams=w.align_and_sort.out, sampleName=w.sample_name), ) w.output("out", source=w.merge_and_mark.out) w.output("reports", source=w.fastqc.out, output_folder=[w.sample_name, "reports"]) return w(**connections)
def constructor(self): self.input("normalBam", self.getStrelka2InputType()) self.input("tumorBam", self.getStrelka2InputType()) self.input("reference", FastaFai) self.input("callRegions", BedTabix(optional=True)) self.input("exome", Boolean(optional=True), default=False) self.input("configStrelka", File(optional=True)) self.input("indelCandidates", Array(VcfTabix)) self.input("strelkaSNVs", Array(VcfTabix)) # self.input("strelkaIndels", Array(VcfTabix)) self.step( "strelka2pass", self.getStrelka2Tool()( indelCandidates=self.indelCandidates, # indelCandidates=self.strelkaIndels, forcedgt=self.strelkaSNVs, normalBam=self.normalBam, tumorBam=self.tumorBam, reference=self.reference, callRegions=self.callRegions, exome=self.exome, config=self.configStrelka, ), ) self.step( "normaliseSNVs", BcfToolsNorm(vcf=self.strelka2pass.snvs, reference=self.reference), ) self.step("indexSNVs", BcfToolsIndex(vcf=self.normaliseSNVs.out)) self.step( "normaliseINDELs", BcfToolsNorm(vcf=self.strelka2pass.indels, reference=self.reference), ) self.step("indexINDELs", BcfToolsIndex(vcf=self.normaliseINDELs.out)) self.output("indels", source=self.indexINDELs.out) self.output("snvs", source=self.indexSNVs.out)
def inputs(self): return [ *super(Gatk4CalculateContaminationBase, self).inputs(), *Gatk4CalculateContaminationBase.additional_args, ToolInput( "pileupTable", File(), prefix="-I", doc="pileup table from summarize pileup", ), ToolInput( "segmentationFileOut", Filename(), prefix="--tumor-segmentation", doc="Reference sequence file", ), ToolInput("contaminationFileOut", Filename(), position=2, prefix="-O"), ]
def inputs(self) -> List[ToolInput]: return [ ToolInput("reference", FastaWithDict(), position=3, shell_quote=False), ToolInput("reference_alt", File(), position=9, shell_quote=False), ToolInput("reads", FastqGzPair, position=4, shell_quote=False), ToolInput( "mates", FastqGzPair(optional=True), separator=" ", position=5, shell_quote=False, doc=None, ), ToolInput( "outputFilename", Filename(prefix=InputSelector("sampleName"), extension=".bam"), shell_quote=False, prefix="-o", position=13, doc="output file name [stdout]", ), ToolInput( "sampleName", String(), doc="Used to construct the readGroupHeaderLine with format: " "'@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:ILLUMINA'", ), ToolInput( "platformTechnology", String(optional=True), doc= "(ReadGroup: PL) Used to construct the readGroupHeaderLine, defaults: ILLUMINA", default="ILLUMINA", ), *self.bwa_additional_inputs, *self.samtools_additional_args, ]