Esempio n. 1
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput(
             "out",
             ZipFile(),
             glob=InputSelector("read", remove_file_extension=True) +
             "_fastqc.zip",
         ),
         ToolOutput(
             "out_datafile",
             File,
             glob=InputSelector("read", remove_file_extension=True) +
             "_fastqc/fastqc_data.txt",
         ),
         ToolOutput(
             "out_html",
             HtmlFile,
             glob=InputSelector("read", remove_file_extension=True) +
             "_fastqc.html",
         ),
         ToolOutput(
             "out_directory",
             Directory,
             selector=InputSelector("read", remove_file_extension=True) +
             "_fastqc",
         ),
     ]
Esempio n. 2
0
 def inputs(self):
     return [
         *super().inputs(),
         *Gatk4CalculateContaminationBase.additional_args,
         ToolInput(
             "pileupTable",
             File(),
             prefix="-I",
             doc="pileup table from summarize pileup",
         ),
         ToolInput(
             "segmentationFileOut",
             Filename(
                 prefix=InputSelector("pileupTable",
                                      remove_file_extension=True),
                 extension=".mutect2_segments",
             ),
             prefix="--tumor-segmentation",
             doc="Reference sequence file",
         ),
         ToolInput(
             "contaminationFileOut",
             Filename(
                 prefix=InputSelector("pileupTable",
                                      remove_file_extension=True),
                 extension=".mutect2_contamination",
             ),
             position=2,
             prefix="-O",
         ),
     ]
Esempio n. 3
0
    def test_string_formatter(self):
        wf = WorkflowBuilder("wf")
        wf.input("sampleName", str)
        wf.input("platform", str)

        wf.input(
            "readGroupHeaderLine",
            String(optional=True),
            default=StringFormatter(
                "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}",
                name=InputSelector("sampleName"),
                pl=InputSelector("platform"),
            ),
        )
        wf.step("print", EchoTestTool(inp=wf.readGroupHeaderLine))
        wf.output("out", source=wf.print)
        d, _ = cwl.CwlTranslator.translate_workflow(
            wf, with_container=False, allow_empty_container=True
        )
        stepinputs = d.save()["steps"][0]["in"]
        self.assertEqual(4, len(stepinputs))
        expression = stepinputs[-1]["valueFrom"]
        expected = (
            "$((inputs._print_inp_readGroupHeaderLine != null) "
            "? inputs._print_inp_readGroupHeaderLine "
            ': "@RG\\\\tID:{name}\\\\tSM:{name}\\\\tLB:{name}\\\\tPL:{pl}".replace(/\\{name\\}/g, inputs._print_inp_sampleName).replace(/\\{pl\\}/g, inputs._print_inp_platform))'
        )
        self.assertEqual(expected, expression)
Esempio n. 4
0
 def outputs(self):
     return [
         ToolOutput(
             "configPickle",
             File(),
             glob=InputSelector("rundir") + "/runWorkflow.py.config.pickle",
         ),
         ToolOutput("script",
                    File(),
                    glob=InputSelector("rundir") + "/runWorkflow.py"),
         ToolOutput(
             "stats",
             Tsv(),
             glob=InputSelector("rundir") + "/results/stats/runStats.tsv",
             doc=
             "A tab-delimited report of various internal statistics from the variant calling process: "
             "Runtime information accumulated for each genome segment, excluding auxiliary steps such "
             "as BAM indexing and vcf merging. Indel candidacy statistics",
         ),
         ToolOutput(
             "indels",
             VcfTabix(),
             glob=InputSelector("rundir") +
             "/results/variants/somatic.indels.vcf.gz",
             doc="",
         ),
         ToolOutput(
             "snvs",
             VcfTabix(),
             glob=InputSelector("rundir") +
             "/results/variants/somatic.snvs.vcf.gz",
             doc="",
         ),
     ]
Esempio n. 5
0
 def outputs(self):
     return [
         ToolOutput(
             "out",
             VcfTabix,
             glob=InputSelector("outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "stats",
             TextFile(extension=".stats"),
             glob=InputSelector("outputFilename") + ".stats",
             doc="To determine type",
         ),
         ToolOutput(
             "f1f2r_out",
             TarFileGz,
             glob=InputSelector("f1r2TarGz_outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "bam",
             BamBai(optional=True),
             glob=InputSelector("outputBamName"),
             doc="File to which assembled haplotypes should be written",
             secondaries_present_as={".bai": "^.bai"},
         ),
     ]
Esempio n. 6
0
 def inputs(self):
     return [
         *super(Gatk4CollectInsertSizeMetricsBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(optional=False),
             prefix="-I",
             doc="Input SAM or BAM file.  Required.",
             position=10,
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 extension=".txt",
                 suffix=".metrics",
             ),
             prefix="-O",
             doc="File to write the output to.  Required.",
         ),
         ToolInput(
             "outputHistogram",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 extension=".pdf",
                 suffix=".histogram",
             ),
             prefix="-H",
             doc="File to write insert size Histogram chart to.  Required. ",
         ),
         *Gatk4CollectInsertSizeMetricsBase.additional_args,
     ]
Esempio n. 7
0
 def outputs(self):
     return [
         ToolOutput("out", Vcf(), glob=InputSelector("outputFilename")),
         ToolOutput("assembly",
                    Bam(),
                    glob=InputSelector("assemblyFilename")),
     ]
Esempio n. 8
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput("std", Stdout),
         ToolOutput("out", File, glob=InputSelector("outputFilename")),
         ToolOutput(
             "stats", File(extension=".html"), glob=InputSelector("statsFile")
         ),
     ]
Esempio n. 9
0
 def test_evaluate_from_input_selector_resolved(self):
     namesel = InputSelector("name")
     adjsel = InputSelector("adjsel")
     b = StringFormatter("{name} is {adjective}",
                         name=namesel,
                         adjective=adjsel)
     inp = {"name": "Janis", "adjsel": "pretty good"}
     self.assertEqual("Janis is pretty good", b.evaluate(inp))
Esempio n. 10
0
 def arguments(self):
     return [
         # BWA MEM command
         ToolArgument("bwa", position=0, shell_quote=False),
         ToolArgument("mem", position=1, shell_quote=False),
         ToolArgument(
             StringFormatter(
                 "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}",
                 name=InputSelector("sampleName"),
                 pl=InputSelector("platformTechnology"),
             ),
             prefix="-R",
             position=3,
             doc="Complete read group header line.",
         ),
         ToolArgument(
             CpuSelector(),
             prefix="-t",
             position=3,
             shell_quote=False,
             doc="Number of threads. (default = 1)",
         ),
         ToolArgument("|", position=6, shell_quote=False),
         # Alt Aware Post Processing command
         ToolArgument("k8", position=7, shell_quote=False),
         ToolArgument("/opt/conda/bin/bwa-postalt.js",
                      position=7,
                      shell_quote=False),
         # Samtools View command
         ToolArgument("|", position=10, shell_quote=False),
         ToolArgument("samtools", position=11, shell_quote=False),
         ToolArgument("view", position=12, shell_quote=False),
         ToolArgument(
             InputSelector("reference"),
             prefix="-T",
             position=13,
             shell_quote=False,
         ),
         ToolArgument(
             CpuSelector(),
             position=13,
             prefix="--threads",
             doc="(@) Number of additional threads to use [0]",
             shell_quote=False,
         ),
         ToolArgument(
             "-h",
             position=13,
             shell_quote=False,
             doc="Include header in the output",
         ),
         ToolArgument(
             "-b",
             position=13,
             shell_quote=False,
             doc="Output in the BAM format.",
         ),
     ]
Esempio n. 11
0
 def outputs(self):
     return [
         ToolOutput("out",
                    File,
                    glob=InputSelector("outdir") + "/abundance.tsv"),
         ToolOutput("stats",
                    File,
                    glob=InputSelector("outdir") + "/run_info.json"),
     ]
Esempio n. 12
0
 def outputs(self):
     return [
         ToolOutput("out", TextFile(), glob=InputSelector("outputFilename")),
         ToolOutput(
             "outHistogram",
             File(extension=".pdf"),
             glob=InputSelector("outputHistogram"),
         ),
     ]
Esempio n. 13
0
 def outputs(self):
     return [
         ToolOutput("geneFileOut",
                    TextFile(),
                    glob=InputSelector("outputGeneFile")),
         ToolOutput("regionFileOut",
                    TextFile(),
                    glob=InputSelector("outputRegionFile")),
     ]
Esempio n. 14
0
 def outputs(self):
     return [
         ToolOutput("out", Tsv, selector=InputSelector("output_filename")),
         ToolOutput(
             "out_discarded",
             Tsv,
             selector=InputSelector("discarded_output_filename"),
         ),
     ]
Esempio n. 15
0
    def test_dotproduct_string_formatter(self):
        sf = StringFormatter("iteration_{i}_{j}",
                             i=InputSelector("it1"),
                             j=InputSelector("it2"))
        sfs = sf.evaluate({"it1": [1, 2, 3], "it2": ["a", "b", "c"]})

        self.assertEqual(3, len(sfs))
        self.assertEqual("iteration_1_a", sfs[0])
        self.assertEqual("iteration_2_b", sfs[1])
        self.assertEqual("iteration_3_c", sfs[2])
Esempio n. 16
0
 def outputs(self):
     return [
         ToolOutput(
             "out",
             BamBai,
             glob=InputSelector("outputFilename"),
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolOutput("metrics", Tsv(), glob=InputSelector("metricsFilename")),
     ]
Esempio n. 17
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput("vcf", Vcf(), glob=InputSelector("outputFilename")),
         ToolOutput(
             "assembly",
             BamBai(),
             glob=InputSelector("assemblyFilename"),
             secondaries_present_as={".bai": "^.bai"},
         ),
     ]
Esempio n. 18
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput(
             "python", File(), glob=InputSelector("runDir") + "/runWorkflow.py"
         ),
         ToolOutput(
             "pickle",
             File(),
             glob=InputSelector("runDir") + "/runWorkflow.py.config.pickle",
         ),
         ToolOutput(
             "candidateSV",
             VcfTabix(),
             glob=InputSelector("runDir") + "/results/variants/candidateSV.vcf.gz",
         ),
         ToolOutput(
             "candidateSmallIndels",
             VcfTabix(),
             glob=InputSelector("runDir")
             + "/results/variants/candidateSmallIndels.vcf.gz",
         ),
         ToolOutput(
             "diploidSV",
             VcfTabix(),
             glob=InputSelector("runDir") + "/results/variants/diploidSV.vcf.gz",
         ),
         ToolOutput(
             "alignmentStatsSummary",
             File(),
             glob=InputSelector("runDir")
             + "/results/stats/alignmentStatsSummary.txt",
         ),
         ToolOutput(
             "svCandidateGenerationStats",
             Tsv(),
             glob=InputSelector("runDir")
             + "/results/stats/svCandidateGenerationStats.tsv",
         ),
         ToolOutput(
             "svLocusGraphStats",
             Tsv(),
             glob=InputSelector("runDir") + "/results/stats/svLocusGraphStats.tsv",
         ),
         # optional outputs
         ToolOutput(
             "somaticSV",
             VcfTabix(optional=True),
             glob=InputSelector("runDir") + "/results/variants/somaticSV.vcf.gz",
         ),
         ToolOutput(
             "tumorSV",
             VcfTabix(optional=True),
             glob=InputSelector("runDir") + "/results/variants/tumorSV.vcf.gz",
         ),
     ]
Esempio n. 19
0
 def inputs(self):
     return [
         *super(Gatk4HaplotypeCallerBase, self).inputs(),
         *Gatk4HaplotypeCallerBase.optional_args,
         ToolInput(
             "inputRead",
             BamBai(),
             doc="BAM/SAM/CRAM file containing reads",
             prefix="--input",
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolInput(
             "reference",
             FastaWithDict(),
             position=5,
             prefix="--reference",
             doc="Reference sequence file",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("inputRead",
                                      remove_file_extension=True),
                 extension=".vcf.gz",
             ),
             position=8,
             prefix="--output",
             doc="File to which variants should be written",
         ),
         ToolInput(
             "dbsnp",
             VcfTabix(optional=True),
             position=7,
             prefix="--dbsnp",
             doc="(Also: -D) A dbSNP VCF file.",
         ),
         ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--intervals",
             doc=
             "-L (BASE) One or more genomic intervals over which to operate",
         ),
         ToolInput(
             "outputBamName",
             Filename(
                 prefix=InputSelector("inputRead",
                                      remove_file_extension=True),
                 extension=".bam",
             ),
             position=8,
             prefix="-bamout",
             doc="File to which assembled haplotypes should be written",
         ),
     ]
Esempio n. 20
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput(
             "out",
             FastqGzPair,
             selector=[
                 InputSelector("outputPrefix") + "-R1.fastq.gz",
                 InputSelector("outputPrefix") + "-R2.fastq.gz",
             ],
         )
     ]
Esempio n. 21
0
 def outputs(self):
     return [
         ToolOutput(
             "out_summary",
             File(),
             glob=InputSelector("outputPrefix") + ".txt",
         ),
         ToolOutput(
             "out_purity_png",
             File(),
             glob=InputSelector("outputPrefix") + "_purity.png",
         ),
         ToolOutput(
             "out_purity_seg",
             File(),
             glob=InputSelector("outputPrefix") + "_purity.seg",
         ),
         ToolOutput(
             "out_purity_rds",
             File(),
             glob=InputSelector("outputPrefix") + "_purity.rds",
         ),
         ToolOutput(
             "out_hisens_png",
             File(),
             glob=InputSelector("outputPrefix") + "_hisens.png",
         ),
         ToolOutput(
             "out_hisens_seg",
             File(),
             glob=InputSelector("outputPrefix") + "_hisens.seg",
         ),
         ToolOutput(
             "out_hisens_rds",
             File(),
             glob=InputSelector("outputPrefix") + "_hisens.rds",
         ),
         ToolOutput(
             "out_arm_level",
             File(optional=True),
             glob=InputSelector("outputPrefix") + ".arm_level.txt",
         ),
         ToolOutput(
             "out_gene_level",
             File(optional=True),
             glob=InputSelector("outputPrefix") + ".gene_level.txt",
         ),
         ToolOutput(
             "out_qc",
             File(optional=True),
             glob=InputSelector("outputPrefix") + ".qc.txt",
         ),
     ]
Esempio n. 22
0
 def test_string_formatter_two_param(self):
     # vardict input format
     b = StringFormatter(
         "{tumorName}:{normalName}",
         tumorName=InputSelector("tumorInputName"),
         normalName=InputSelector("normalInputName"),
     )
     res = cwl.get_input_value_from_potential_selector_or_generator(b)
     self.assertEqual(
         '$("{tumorName}:{normalName}".replace(/\{tumorName\}/g, inputs.tumorInputName).replace(/\{normalName\}/g, inputs.normalInputName))',
         res,
     )
Esempio n. 23
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput("out",
                    VcfTabix(optional=True),
                    selector=InputSelector("outputFilename")),
         ToolOutput("out_stdout", Stdout),
         ToolOutput("out_stats",
                    File(optional=True, extension=".html"),
                    selector=InputSelector("statsFile")),
         ToolOutput("out_warnings",
                    File(optional=True, extension=".txt"),
                    selector=InputSelector("warningFile")),
     ]
Esempio n. 24
0
 def test_string_formatter_two_param(self):
     # vardict input format
     d = {
         "tumorInputName": ToolInput("tumorInputName", String()),
         "normalInputName": ToolInput("normalInputName", String()),
     }
     b = StringFormatter(
         "{tumorName}:{normalName}",
         tumorName=InputSelector("tumorInputName"),
         normalName=InputSelector("normalInputName"),
     )
     res = wdl.get_input_value_from_potential_selector_or_generator(b, d)
     self.assertEqual("${tumorInputName}:${normalInputName}", res)
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput(
             "out",
             File,
             glob=StringFormatter(
                 "{output_dir}/{tumor_name}--{normal_name}.pdf",
                 output_dir=InputSelector("output_dir"),
                 tumor_name=InputSelector("tumor_name"),
                 normal_name=InputSelector("normal_name"),
             ),
         )
     ]
Esempio n. 26
0
 def arguments(self):
     return [
         ToolArgument("bwa", position=0, shell_quote=False),
         ToolArgument("mem", position=1, shell_quote=False),
         ToolArgument("|", position=5, shell_quote=False),
         ToolArgument("samtools", position=6, shell_quote=False),
         ToolArgument("view", position=7, shell_quote=False),
         ToolArgument(InputSelector("reference"),
                      prefix="-T",
                      position=8,
                      shell_quote=False),
         ToolArgument(
             CpuSelector(),
             position=8,
             shell_quote=False,
             prefix="--threads",
             doc="(-@)  Number of additional threads to use [0]",
         ),
         ToolArgument(
             "-h",
             position=8,
             shell_quote=False,
             doc="Include the header in the output.",
         ),
         ToolArgument("-b",
                      position=8,
                      shell_quote=False,
                      doc="Output in the BAM format."),
         ToolArgument(
             StringFormatter(
                 "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}",
                 name=InputSelector("sampleName"),
                 pl=InputSelector("platformTechnology"),
             ),
             prefix="-R",
             position=2,
             doc=
             "Complete read group header line. ’\\t’ can be used in STR and will be converted to a TAB"
             "in the output SAM. The read group ID will be attached to every read in the output. "
             "An example is ’@RG\\tID:foo\\tSM:bar’. (Default=null) "
             "https://gatkforums.broadinstitute.org/gatk/discussion/6472/read-groups",
         ),
         ToolArgument(
             CpuSelector(),
             prefix="-t",
             position=2,
             shell_quote=False,
             doc="Number of threads. (default = 1)",
         ),
     ]
Esempio n. 27
0
 def outputs(self):
     return [
         ToolOutput(
             "contOut",
             File(),
             glob=InputSelector("contaminationFileOut"),
             doc="contamination Table",
         ),
         ToolOutput(
             "segOut",
             File(),
             glob=InputSelector("segmentationFileOut"),
             doc="segmentation based on baf",
         ),
     ]
Esempio n. 28
0
 def test_string_formatter_optional_inpselect_no_default(self):
     # will throw
     ti = {"ti": ToolInput("ti", String(optional=True))}
     b = StringFormatter("{place} michael", place=InputSelector("ti"))
     self.assertRaises(
         Exception, wdl.get_input_value_from_potential_selector_or_generator, b, ti
     )
Esempio n. 29
0
 def test_input_selector_base_nostringenv(self):
     ti = {"random": ToolInput("random", String())}
     input_sel = InputSelector("random")
     self.assertEqual(
         "random",
         wdl.translate_input_selector(input_sel, ti, string_environment=False),
     )
Esempio n. 30
0
    def inputs(self):
        # Would be good to include this in the prefix:
        #   If(InputSelector("bam").length().equals(1), InputSelector("bam")[0].basename(), None)

        prefix = FirstOperator([InputSelector("outputPrefix"), "generated"])
        return [
            ToolInput(
                "bam",
                Array(Bam),
                prefix="-I",
                position=10,
                # secondaries_present_as={".bai": "^.bai"},
                doc=
                "One or more input SAM or BAM files to analyze. Must be coordinate sorted.",
            ),
            ToolInput("outputPrefix", String(optional=True)),
            ToolInput(
                "outputFilename",
                Filename(prefix=prefix, suffix=".markduped", extension=".bam"),
                position=10,
                prefix="-O",
                doc="File to write duplication metrics to",
            ),
            ToolInput(
                "metricsFilename",
                Filename(prefix=prefix, suffix=".metrics", extension=".txt"),
                position=10,
                prefix="-M",
                doc="The output file to write marked records to.",
            ),
            *super().inputs(),
            *self.additional_args,
        ]