def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "out", ZipFile(), glob=InputSelector("read", remove_file_extension=True) + "_fastqc.zip", ), ToolOutput( "out_datafile", File, glob=InputSelector("read", remove_file_extension=True) + "_fastqc/fastqc_data.txt", ), ToolOutput( "out_html", HtmlFile, glob=InputSelector("read", remove_file_extension=True) + "_fastqc.html", ), ToolOutput( "out_directory", Directory, selector=InputSelector("read", remove_file_extension=True) + "_fastqc", ), ]
def inputs(self): return [ *super().inputs(), *Gatk4CalculateContaminationBase.additional_args, ToolInput( "pileupTable", File(), prefix="-I", doc="pileup table from summarize pileup", ), ToolInput( "segmentationFileOut", Filename( prefix=InputSelector("pileupTable", remove_file_extension=True), extension=".mutect2_segments", ), prefix="--tumor-segmentation", doc="Reference sequence file", ), ToolInput( "contaminationFileOut", Filename( prefix=InputSelector("pileupTable", remove_file_extension=True), extension=".mutect2_contamination", ), position=2, prefix="-O", ), ]
def test_string_formatter(self): wf = WorkflowBuilder("wf") wf.input("sampleName", str) wf.input("platform", str) wf.input( "readGroupHeaderLine", String(optional=True), default=StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=InputSelector("sampleName"), pl=InputSelector("platform"), ), ) wf.step("print", EchoTestTool(inp=wf.readGroupHeaderLine)) wf.output("out", source=wf.print) d, _ = cwl.CwlTranslator.translate_workflow( wf, with_container=False, allow_empty_container=True ) stepinputs = d.save()["steps"][0]["in"] self.assertEqual(4, len(stepinputs)) expression = stepinputs[-1]["valueFrom"] expected = ( "$((inputs._print_inp_readGroupHeaderLine != null) " "? inputs._print_inp_readGroupHeaderLine " ': "@RG\\\\tID:{name}\\\\tSM:{name}\\\\tLB:{name}\\\\tPL:{pl}".replace(/\\{name\\}/g, inputs._print_inp_sampleName).replace(/\\{pl\\}/g, inputs._print_inp_platform))' ) self.assertEqual(expected, expression)
def outputs(self): return [ ToolOutput( "configPickle", File(), glob=InputSelector("rundir") + "/runWorkflow.py.config.pickle", ), ToolOutput("script", File(), glob=InputSelector("rundir") + "/runWorkflow.py"), ToolOutput( "stats", Tsv(), glob=InputSelector("rundir") + "/results/stats/runStats.tsv", doc= "A tab-delimited report of various internal statistics from the variant calling process: " "Runtime information accumulated for each genome segment, excluding auxiliary steps such " "as BAM indexing and vcf merging. Indel candidacy statistics", ), ToolOutput( "indels", VcfTabix(), glob=InputSelector("rundir") + "/results/variants/somatic.indels.vcf.gz", doc="", ), ToolOutput( "snvs", VcfTabix(), glob=InputSelector("rundir") + "/results/variants/somatic.snvs.vcf.gz", doc="", ), ]
def outputs(self): return [ ToolOutput( "out", VcfTabix, glob=InputSelector("outputFilename"), doc="To determine type", ), ToolOutput( "stats", TextFile(extension=".stats"), glob=InputSelector("outputFilename") + ".stats", doc="To determine type", ), ToolOutput( "f1f2r_out", TarFileGz, glob=InputSelector("f1r2TarGz_outputFilename"), doc="To determine type", ), ToolOutput( "bam", BamBai(optional=True), glob=InputSelector("outputBamName"), doc="File to which assembled haplotypes should be written", secondaries_present_as={".bai": "^.bai"}, ), ]
def inputs(self): return [ *super(Gatk4CollectInsertSizeMetricsBase, self).inputs(), ToolInput( "bam", BamBai(optional=False), prefix="-I", doc="Input SAM or BAM file. Required.", position=10, ), ToolInput( "outputFilename", Filename( prefix=InputSelector("bam", remove_file_extension=True), extension=".txt", suffix=".metrics", ), prefix="-O", doc="File to write the output to. Required.", ), ToolInput( "outputHistogram", Filename( prefix=InputSelector("bam", remove_file_extension=True), extension=".pdf", suffix=".histogram", ), prefix="-H", doc="File to write insert size Histogram chart to. Required. ", ), *Gatk4CollectInsertSizeMetricsBase.additional_args, ]
def outputs(self): return [ ToolOutput("out", Vcf(), glob=InputSelector("outputFilename")), ToolOutput("assembly", Bam(), glob=InputSelector("assemblyFilename")), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("std", Stdout), ToolOutput("out", File, glob=InputSelector("outputFilename")), ToolOutput( "stats", File(extension=".html"), glob=InputSelector("statsFile") ), ]
def test_evaluate_from_input_selector_resolved(self): namesel = InputSelector("name") adjsel = InputSelector("adjsel") b = StringFormatter("{name} is {adjective}", name=namesel, adjective=adjsel) inp = {"name": "Janis", "adjsel": "pretty good"} self.assertEqual("Janis is pretty good", b.evaluate(inp))
def arguments(self): return [ # BWA MEM command ToolArgument("bwa", position=0, shell_quote=False), ToolArgument("mem", position=1, shell_quote=False), ToolArgument( StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=InputSelector("sampleName"), pl=InputSelector("platformTechnology"), ), prefix="-R", position=3, doc="Complete read group header line.", ), ToolArgument( CpuSelector(), prefix="-t", position=3, shell_quote=False, doc="Number of threads. (default = 1)", ), ToolArgument("|", position=6, shell_quote=False), # Alt Aware Post Processing command ToolArgument("k8", position=7, shell_quote=False), ToolArgument("/opt/conda/bin/bwa-postalt.js", position=7, shell_quote=False), # Samtools View command ToolArgument("|", position=10, shell_quote=False), ToolArgument("samtools", position=11, shell_quote=False), ToolArgument("view", position=12, shell_quote=False), ToolArgument( InputSelector("reference"), prefix="-T", position=13, shell_quote=False, ), ToolArgument( CpuSelector(), position=13, prefix="--threads", doc="(@) Number of additional threads to use [0]", shell_quote=False, ), ToolArgument( "-h", position=13, shell_quote=False, doc="Include header in the output", ), ToolArgument( "-b", position=13, shell_quote=False, doc="Output in the BAM format.", ), ]
def outputs(self): return [ ToolOutput("out", File, glob=InputSelector("outdir") + "/abundance.tsv"), ToolOutput("stats", File, glob=InputSelector("outdir") + "/run_info.json"), ]
def outputs(self): return [ ToolOutput("out", TextFile(), glob=InputSelector("outputFilename")), ToolOutput( "outHistogram", File(extension=".pdf"), glob=InputSelector("outputHistogram"), ), ]
def outputs(self): return [ ToolOutput("geneFileOut", TextFile(), glob=InputSelector("outputGeneFile")), ToolOutput("regionFileOut", TextFile(), glob=InputSelector("outputRegionFile")), ]
def outputs(self): return [ ToolOutput("out", Tsv, selector=InputSelector("output_filename")), ToolOutput( "out_discarded", Tsv, selector=InputSelector("discarded_output_filename"), ), ]
def test_dotproduct_string_formatter(self): sf = StringFormatter("iteration_{i}_{j}", i=InputSelector("it1"), j=InputSelector("it2")) sfs = sf.evaluate({"it1": [1, 2, 3], "it2": ["a", "b", "c"]}) self.assertEqual(3, len(sfs)) self.assertEqual("iteration_1_a", sfs[0]) self.assertEqual("iteration_2_b", sfs[1]) self.assertEqual("iteration_3_c", sfs[2])
def outputs(self): return [ ToolOutput( "out", BamBai, glob=InputSelector("outputFilename"), secondaries_present_as={".bai": "^.bai"}, ), ToolOutput("metrics", Tsv(), glob=InputSelector("metricsFilename")), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("vcf", Vcf(), glob=InputSelector("outputFilename")), ToolOutput( "assembly", BamBai(), glob=InputSelector("assemblyFilename"), secondaries_present_as={".bai": "^.bai"}, ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "python", File(), glob=InputSelector("runDir") + "/runWorkflow.py" ), ToolOutput( "pickle", File(), glob=InputSelector("runDir") + "/runWorkflow.py.config.pickle", ), ToolOutput( "candidateSV", VcfTabix(), glob=InputSelector("runDir") + "/results/variants/candidateSV.vcf.gz", ), ToolOutput( "candidateSmallIndels", VcfTabix(), glob=InputSelector("runDir") + "/results/variants/candidateSmallIndels.vcf.gz", ), ToolOutput( "diploidSV", VcfTabix(), glob=InputSelector("runDir") + "/results/variants/diploidSV.vcf.gz", ), ToolOutput( "alignmentStatsSummary", File(), glob=InputSelector("runDir") + "/results/stats/alignmentStatsSummary.txt", ), ToolOutput( "svCandidateGenerationStats", Tsv(), glob=InputSelector("runDir") + "/results/stats/svCandidateGenerationStats.tsv", ), ToolOutput( "svLocusGraphStats", Tsv(), glob=InputSelector("runDir") + "/results/stats/svLocusGraphStats.tsv", ), # optional outputs ToolOutput( "somaticSV", VcfTabix(optional=True), glob=InputSelector("runDir") + "/results/variants/somaticSV.vcf.gz", ), ToolOutput( "tumorSV", VcfTabix(optional=True), glob=InputSelector("runDir") + "/results/variants/tumorSV.vcf.gz", ), ]
def inputs(self): return [ *super(Gatk4HaplotypeCallerBase, self).inputs(), *Gatk4HaplotypeCallerBase.optional_args, ToolInput( "inputRead", BamBai(), doc="BAM/SAM/CRAM file containing reads", prefix="--input", secondaries_present_as={".bai": "^.bai"}, ), ToolInput( "reference", FastaWithDict(), position=5, prefix="--reference", doc="Reference sequence file", ), ToolInput( "outputFilename", Filename( prefix=InputSelector("inputRead", remove_file_extension=True), extension=".vcf.gz", ), position=8, prefix="--output", doc="File to which variants should be written", ), ToolInput( "dbsnp", VcfTabix(optional=True), position=7, prefix="--dbsnp", doc="(Also: -D) A dbSNP VCF file.", ), ToolInput( "intervals", Bed(optional=True), prefix="--intervals", doc= "-L (BASE) One or more genomic intervals over which to operate", ), ToolInput( "outputBamName", Filename( prefix=InputSelector("inputRead", remove_file_extension=True), extension=".bam", ), position=8, prefix="-bamout", doc="File to which assembled haplotypes should be written", ), ]
def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "out", FastqGzPair, selector=[ InputSelector("outputPrefix") + "-R1.fastq.gz", InputSelector("outputPrefix") + "-R2.fastq.gz", ], ) ]
def outputs(self): return [ ToolOutput( "out_summary", File(), glob=InputSelector("outputPrefix") + ".txt", ), ToolOutput( "out_purity_png", File(), glob=InputSelector("outputPrefix") + "_purity.png", ), ToolOutput( "out_purity_seg", File(), glob=InputSelector("outputPrefix") + "_purity.seg", ), ToolOutput( "out_purity_rds", File(), glob=InputSelector("outputPrefix") + "_purity.rds", ), ToolOutput( "out_hisens_png", File(), glob=InputSelector("outputPrefix") + "_hisens.png", ), ToolOutput( "out_hisens_seg", File(), glob=InputSelector("outputPrefix") + "_hisens.seg", ), ToolOutput( "out_hisens_rds", File(), glob=InputSelector("outputPrefix") + "_hisens.rds", ), ToolOutput( "out_arm_level", File(optional=True), glob=InputSelector("outputPrefix") + ".arm_level.txt", ), ToolOutput( "out_gene_level", File(optional=True), glob=InputSelector("outputPrefix") + ".gene_level.txt", ), ToolOutput( "out_qc", File(optional=True), glob=InputSelector("outputPrefix") + ".qc.txt", ), ]
def test_string_formatter_two_param(self): # vardict input format b = StringFormatter( "{tumorName}:{normalName}", tumorName=InputSelector("tumorInputName"), normalName=InputSelector("normalInputName"), ) res = cwl.get_input_value_from_potential_selector_or_generator(b) self.assertEqual( '$("{tumorName}:{normalName}".replace(/\{tumorName\}/g, inputs.tumorInputName).replace(/\{normalName\}/g, inputs.normalInputName))', res, )
def outputs(self) -> List[ToolOutput]: return [ ToolOutput("out", VcfTabix(optional=True), selector=InputSelector("outputFilename")), ToolOutput("out_stdout", Stdout), ToolOutput("out_stats", File(optional=True, extension=".html"), selector=InputSelector("statsFile")), ToolOutput("out_warnings", File(optional=True, extension=".txt"), selector=InputSelector("warningFile")), ]
def test_string_formatter_two_param(self): # vardict input format d = { "tumorInputName": ToolInput("tumorInputName", String()), "normalInputName": ToolInput("normalInputName", String()), } b = StringFormatter( "{tumorName}:{normalName}", tumorName=InputSelector("tumorInputName"), normalName=InputSelector("normalInputName"), ) res = wdl.get_input_value_from_potential_selector_or_generator(b, d) self.assertEqual("${tumorInputName}:${normalInputName}", res)
def outputs(self) -> List[ToolOutput]: return [ ToolOutput( "out", File, glob=StringFormatter( "{output_dir}/{tumor_name}--{normal_name}.pdf", output_dir=InputSelector("output_dir"), tumor_name=InputSelector("tumor_name"), normal_name=InputSelector("normal_name"), ), ) ]
def arguments(self): return [ ToolArgument("bwa", position=0, shell_quote=False), ToolArgument("mem", position=1, shell_quote=False), ToolArgument("|", position=5, shell_quote=False), ToolArgument("samtools", position=6, shell_quote=False), ToolArgument("view", position=7, shell_quote=False), ToolArgument(InputSelector("reference"), prefix="-T", position=8, shell_quote=False), ToolArgument( CpuSelector(), position=8, shell_quote=False, prefix="--threads", doc="(-@) Number of additional threads to use [0]", ), ToolArgument( "-h", position=8, shell_quote=False, doc="Include the header in the output.", ), ToolArgument("-b", position=8, shell_quote=False, doc="Output in the BAM format."), ToolArgument( StringFormatter( "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}", name=InputSelector("sampleName"), pl=InputSelector("platformTechnology"), ), prefix="-R", position=2, doc= "Complete read group header line. ’\\t’ can be used in STR and will be converted to a TAB" "in the output SAM. The read group ID will be attached to every read in the output. " "An example is ’@RG\\tID:foo\\tSM:bar’. (Default=null) " "https://gatkforums.broadinstitute.org/gatk/discussion/6472/read-groups", ), ToolArgument( CpuSelector(), prefix="-t", position=2, shell_quote=False, doc="Number of threads. (default = 1)", ), ]
def outputs(self): return [ ToolOutput( "contOut", File(), glob=InputSelector("contaminationFileOut"), doc="contamination Table", ), ToolOutput( "segOut", File(), glob=InputSelector("segmentationFileOut"), doc="segmentation based on baf", ), ]
def test_string_formatter_optional_inpselect_no_default(self): # will throw ti = {"ti": ToolInput("ti", String(optional=True))} b = StringFormatter("{place} michael", place=InputSelector("ti")) self.assertRaises( Exception, wdl.get_input_value_from_potential_selector_or_generator, b, ti )
def test_input_selector_base_nostringenv(self): ti = {"random": ToolInput("random", String())} input_sel = InputSelector("random") self.assertEqual( "random", wdl.translate_input_selector(input_sel, ti, string_environment=False), )
def inputs(self): # Would be good to include this in the prefix: # If(InputSelector("bam").length().equals(1), InputSelector("bam")[0].basename(), None) prefix = FirstOperator([InputSelector("outputPrefix"), "generated"]) return [ ToolInput( "bam", Array(Bam), prefix="-I", position=10, # secondaries_present_as={".bai": "^.bai"}, doc= "One or more input SAM or BAM files to analyze. Must be coordinate sorted.", ), ToolInput("outputPrefix", String(optional=True)), ToolInput( "outputFilename", Filename(prefix=prefix, suffix=".markduped", extension=".bam"), position=10, prefix="-O", doc="File to write duplication metrics to", ), ToolInput( "metricsFilename", Filename(prefix=prefix, suffix=".metrics", extension=".txt"), position=10, prefix="-M", doc="The output file to write marked records to.", ), *super().inputs(), *self.additional_args, ]