Esempio n. 1
0
    def test_dot_4(self):
        w = WorkflowBuilder("sbmf")
        w.input("inp", Array(str))
        w.input("inp2", Array(str))
        w.input("inp3", Array(str))
        w.input("inp4", Array(str))

        step = w.step(
            "dotTool",
            SingleTestTool(inputs=w.inp, input2=w.inp2, input3=w.inp3, input4=w.inp4),
            scatter=ScatterDescription(
                fields=["inputs", "input2", "input3", "input4"],
                method=ScatterMethods.dot,
            ),
        )

        outp = wdl.translate_step_node(
            step, "A.SingleTestTool", {}, {"inp", "inp2", "inp3", "inp4"}
        )
        expected = """\
scatter (Q in zip(inp, zip(inp2, zip(inp3, inp4)))) {
   call A.SingleTestTool as dotTool {
    input:
      inputs=Q.left,
      input2=Q.right.left,
      input3=Q.right.right.left,
      input4=Q.right.right.right
  }
}"""
        self.assertEqual(expected, outp.get_string(indent=0))
Esempio n. 2
0
 def inputs(self):
     return [
         ToolInput("files", Array(File()), position=2, localise_file=True),
         ToolInput(
             "files2", Array(File(), optional=True), position=3, localise_file=True
         ),
         ToolInput("outputFilename", Filename(extension=".tar"), position=1),
     ]
 def inputs(self):
     return [
         *StarAlignerBase.additional_inputs,
         ToolInput("help", Boolean(optional=True), prefix="--help", doc="help page"),
         ToolInput(
             "runThreadN",
             Int(optional=True),
             default=CpuSelector(),
             prefix="--runThreadN",
             doc="int: number of threads to run STAR. Default: 1.",
         ),
         ToolInput(
             "genomeDir",
             Directory(optional=True),
             prefix="--genomeDir",
             doc="string: path to the directory where genome files are stored (for –runMode alignReads) or will be generated (for –runMode generateGenome). Default: ./GenomeDir",
         ),
         ToolInput(
             "readFilesIn",
             Array(FastqGz, optional=True),
             prefix="--readFilesIn",
             separator=",",
             doc="string(s): paths to files that contain input read1 (and, if needed, read2). Default: Read1,Read2.",
         ),
         ToolInput(
             "outFileNamePrefix",
             Filename(),
             prefix="--outFileNamePrefix",
             doc="string: output files name prefix (including full or relative path). Can only be defined on the command line.",
         ),
         ToolInput(
             "outSAMtype",
             Array(String(), optional=True),
             prefix="--outSAMtype",
             separator=" ",
             prefix_applies_to_all_elements=False,
             doc='strings: type of SAM/BAM output. 1st word: "BAM": outputBAMwithoutsorting, "SAM": outputSAMwithoutsorting, "None": no SAM/BAM output. 2nd,3rd: "Unsorted": standard unsorted. "SortedByCoordinate": sorted by coordinate. This option will allocate extra memory for sorting which can be specified by –limitBAMsortRAM.',
         ),
         ToolInput(
             "outSAMunmapped",
             String(optional=True),
             prefix="--outSAMunmapped",
             doc="string(s): output of unmapped reads in the SAM format",
         ),
         ToolInput(
             "outSAMattributes",
             String(optional=True),
             prefix="--outSAMattributes",
             doc="string: a string of desired SAM attributes, in the order desired for the output SAM",
         ),
         ToolInput(
             "readFilesCommand",
             String(optional=True),
             prefix="--readFilesCommand",
             doc="string(s): command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout",
         ),
     ]
Esempio n. 4
0
    def test_add_scatter_nested_arrays_incompatible(self):
        w = WorkflowBuilder("scatterededge")
        w.input("inp", Array(Array(int)))
        stp = w.step("stp", ArrayTestTool(inputs=w.inp), scatter="inputs")

        e = first_value(w.stp.sources["inputs"].source_map)

        self.assertFalse(e.compatible_types)
        self.assertListEqual(["inputs"], stp.scatter.fields)
Esempio n. 5
0
    def test_add_scatter_nested_arrays(self):
        w = WorkflowBuilder("scatterededge")
        w.input("inp", Array(Array(str)))
        stp = w.step("stp", ArrayTestTool(inps=w.inp), scatter="inps")

        e = w.stp.sources["inps"].source_map[0]

        self.assertTrue(e.compatible_types)
        self.assertListEqual(["inps"], stp.scatter.fields)
Esempio n. 6
0
 def test_array_of_array_of_strings(self):
     ar = Array(Array(String()))
     d = ar.cwl_type()
     self.assertDictEqual(
         {
             "type": "array",
             "items": {
                 "type": "array",
                 "items": "string"
             }
         }, d.save())
Esempio n. 7
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput(
             "out", Array(ZipFile()), glob=WildcardSelector(wildcard="*.zip")
         ),
         ToolOutput(
             "datafile",
             Array(File),
             glob=WildcardSelector(wildcard="*/fastqc_data.txt"),
         ),
     ]
Esempio n. 8
0
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput(
             "outputFilename",
             Filename(extension=".vcf", suffix=".combined"),
             prefix="-o",
         ),
         # deprecated
         # ToolInput(
         #     "regions",
         #     Filename(extension=".tsv"),
         #     prefix="--regions",
         #     doc="Region file containing all the variants, used as samtools mpileup",
         # ),
         ToolInput(
             "vcfs",
             Array(Vcf()),
             prefix="-i",
             prefix_applies_to_all_elements=True,
             doc=
             "input vcfs, the priority of the vcfs will be based on the order of the input",
         ),
         ToolInput("type",
                   String(),
                   prefix="--type",
                   doc="germline | somatic"),
         ToolInput(
             "columns",
             Array(String(), optional=True),
             prefix="--columns",
             separator=",",
             doc="Columns to keep, seperated by space output vcf (unsorted)",
         ),
         ToolInput(
             "normal",
             String(optional=True),
             prefix="--normal",
             doc=
             "Sample id of germline vcf, or normal sample id of somatic vcf",
         ),
         ToolInput(
             "tumor",
             String(optional=True),
             prefix="--tumor",
             doc="tumor sample ID, required if inputs are somatic vcfs",
         ),
         ToolInput(
             "priority",
             Int(optional=True),
             prefix="--priority",
             doc=
             "The priority of the callers, must match with the callers in the source header",
         ),
     ]
Esempio n. 9
0
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("headerVcfs", Array(VcfTabix), position=1),
         ToolInput("contentVcfs", Array(VcfTabix), position=4),
         ToolInput(
             "outputFilename",
             Filename(extension=".vcf", suffix=".strelka"),
             prefix=">",
             position=6,
             shell_quote=False,
         ),
     ]
Esempio n. 10
0
 def test_array_of_array_of_strings(self):
     ar = Array(Array(String()))
     d = ar.cwl_type()
     self.assertEqual(
         d.get_dict(),
         {
             "type": "array",
             "items": {
                 "type": "array",
                 "items": "string"
             }
         },
     )
Esempio n. 11
0
    def constructor(self):
        # Inputs

        self.input("sample_name", str)
        self.input("reference", FastaWithDict)
        self.input("reference_alt", File)
        self.input("fastq", FastqGzPair)

        # Pipe adapters
        self.input("cutadapt_adapter", Array(str, optional=True))
        self.input("cutadapt_removeMiddle3Adapter", Array(str, optional=True))

        # Steps
        self.step(
            "cutadapt",
            CutAdapt_2_4(
                fastq=self.fastq,
                adapter=self.cutadapt_adapter,
                front=None,
                removeMiddle5Adapter=None,
                removeMiddle3Adapter=self.cutadapt_removeMiddle3Adapter,
                qualityCutoff=15,
                minimumLength=50,
                outputPrefix=self.sample_name,
            ),
        )

        self.step(
            "bwamempostalt",
            BwaMem_PostAlt_SamToolsView(
                reads=self.cutadapt.out,
                sampleName=self.sample_name,
                reference=self.reference,
                markShorterSplits=True,
                reference_alt=self.reference_alt,
            ),
        )
        self.step(
            "sortsam",
            Gatk4SortSam_4_1_2(
                bam=self.bwamempostalt.out,
                sortOrder="coordinate",
                createIndex=True,
                validationStringency="SILENT",
                maxRecordsInRam=5000000,
                tmpDir=".",
            ),
        )

        # Outputs
        self.output("out", source=self.sortsam)
Esempio n. 12
0
 def outputs(self):
     return [
         ToolOutput(
             "unalignedReads",
             output_type=Array(FastqGz()),
             glob=WildcardSelector("*/*.fastq.gz"),
         ),
         ToolOutput("stats",
                    output_type=Array(File()),
                    glob=WildcardSelector("Stats/*")),
         ToolOutput("interop",
                    output_type=Array(File()),
                    glob=WildcardSelector("InterOp/*")),
     ]
Esempio n. 13
0
    def add_inputs(self):
        # INPUTS
        self.input("normal_inputs",
                   Array(FastqGzPair),
                   doc=INPUT_DOCS["normal_inputs"])
        self.input("tumor_inputs",
                   Array(FastqGzPair),
                   doc=INPUT_DOCS["tumor_inputs"])
        self.input("normal_name", String(), doc=INPUT_DOCS["normal_name"])
        self.input("tumor_name", String(), doc=INPUT_DOCS["tumor_name"])

        self.add_inputs_for_reference()
        self.add_inputs_for_intervals()
        self.add_inputs_for_configuration()
Esempio n. 14
0
    def inputs(self) -> List[ToolInput]:
        import uuid

        fastq_uuid = str(uuid.uuid1())
        return [
            ToolInput("fastq", FastqGzPair, position=5),
            ToolInput(
                "adapter",
                input_type=Array(String(), optional=True),
                prefix="-a",
                prefix_applies_to_all_elements=True,
                doc=
                "Sequence of an adapter ligated to the 3' end (paired data: of the first read). "
                "The adapter and subsequent bases are trimmed. If a '$' character is appended ('anchoring'), "
                "the adapter is only found if it is a suffix of the read.",
            ),
            ToolInput(
                "outputFilename",
                Filename(suffix="-R1", extension=".fastq.gz"),
                prefix="-o",
                doc=
                "Write trimmed reads to FILE. FASTQ or FASTA format is chosen depending on input. "
                "The summary report is sent to standard output. Use '{name}' in FILE to demultiplex "
                "reads into multiple files. Default: write to standard output",
            ),
            ToolInput(
                "secondReadFile",
                Filename(suffix="-R2", extension=".fastq.gz"),
                prefix="-p",
                doc="Write second read in a pair to FILE.",
            ),
            *self.additional_args,
        ]
Esempio n. 15
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             "bams",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM file to sort.",
             position=10,
         ),
         ToolInput(
             "sampleName",
             String(optional=True),
             doc="Used for naming purposes only",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("sampleName"),
                 suffix=".merged",
                 extension=".bam",
             ),
             position=10,
             prefix="-O",
             doc="SAM/BAM file to write merged result to",
         ),
         *self.additional_args,
     ]
Esempio n. 16
0
    def test_add_non_scatter2(self):
        w = WorkflowBuilder("scatterededge")
        w.input("inp", Array(String()))
        w.step("stp", ArrayTestTool(inputs=w.inp))

        e = first_value(w.stp.sources["inputs"].source_map)
        self.assertFalse(e.scatter)
Esempio n. 17
0
 def inputs(self):
     return [
         *super().inputs(),
         *Gatk4GetPileUpSummariesBase.additional_args,
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM/CRAM file containing reads.",
             position=0,
         ),
         ToolInput(
             "sites",
             VcfTabix(),
             prefix="-V",
             doc="sites of common biallelic variants",
         ),
         ToolInput(
             "intervals",
             VcfTabix(optional=True),
             prefix="--intervals",
             doc=
             "-L (BASE) One or more genomic intervals over which to operate",
         ),
         ToolInput("pileupTableOut",
                   Filename(extension=".txt"),
                   position=1,
                   prefix="-O"),
     ]
Esempio n. 18
0
 def inputs(self):
     return [
         ToolInput(
             "inp_files",
             Array(File),
             position=4,
         ),
         ToolInput(
             "inp_files2",
             Array(File),
             position=5,
         ),
         ToolInput(
             "output_dir", String(optional=True), default="output_dir", position=8
         ),
     ]
Esempio n. 19
0
 def inputs(self):
     return [
         *self.additional_inputs,
         ToolInput(
             "bam",
             Array(Bam),
             position=10,
             doc=
             "A list of SAM or BAM format files. They can be either name or location sorted. If no files provided, <stdin> input is expected. Location-sorted paired-end reads are automatically sorted by read names.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".txt"),
             prefix="-o",
             doc=
             "Name of output file including read counts. A separate file including summary statistics of counting results is also included in the output ('<string>.summary'). Both files are in tab delimited format.",
         ),
         ToolInput(
             "annotationFile",
             File,
             prefix="-a",
             doc=
             "Name of an annotation file. GTF/GFF format by default. See -F option for more format information. Inbuilt annotations (SAF format) is available in 'annotation' directory of the package. Gzipped file is also accepted.",
         ),
     ]
Esempio n. 20
0
    def constructor(self):

        self.input("input_bam", BamBai)
        self.input("ref_fasta", FastaWithIndexes)
        self.input("intervals", Array(Bed))

        self.step(
            "haplotype_caller",
            Gatk4HaplotypeCaller_4_1_4(
                inputRead=self.input_bam,
                reference=self.ref_fasta,
                intervals=self.intervals,
                gvcfGqBands=[10, 20, 30, 40, 50, 60, 70, 80, 90],
                contaminationFractionToFilter=0.0,
                annotationGroup=[
                    "StandardAnnotation",
                    "StandardHCAnnotation",
                    # "AS_StandardAnnotation",
                ],
            ),
            scatter="intervals",
        )

        self.step("merge",
                  Gatk4MergeVcfs_4_1_4(vcfs=self.haplotype_caller.out))

        self.output("output_vcf", source=self.merge.output_vcf)
Esempio n. 21
0
 def inputs(self):
     return [
         *super(SamToolsViewBase, self).inputs(),
         *SamToolsViewBase.additional_inputs,
         ToolInput("sam", UnionType(Sam(), Bam(), Cram()), position=10),
         ToolInput(
             "reference",
             FastaWithDict(optional=True),
             position=6,
             prefix="-T",
             doc=
             "A FASTA format reference FILE, optionally compressed by bgzip and ideally indexed "
             "by samtools faidx. If an index is not present, one will be generated for you.",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("sam", remove_file_extension=True),
                 extension=".bam",
             ),
             position=5,
             prefix="-o",
             doc="Output to FILE [stdout].",
         ),
         ToolInput(
             "regions",
             Array(String, optional=True),
             position=11,
             doc=
             "Region specifications after the input filename to restrict output to only those alignments which "
             "overlap the specified region(s). Use of region specifications requires a coordinate-sorted and "
             "indexed input file (in BAM or CRAM format)",
         ),
     ]
Esempio n. 22
0
 def inputs(self):
     return [
         ToolInput("bams", Array(Bam()), position=10),
         ToolInput("reference",
                   FastaWithDict(),
                   position=1,
                   prefix="--reference"),
         ToolInput(
             "outputFilename",
             Filename(suffix=".svs", extension=".vcf"),
             position=2,
             prefix="--output",
         ),
         ToolInput(
             "assemblyFilename",
             Filename(suffix=".assembled", extension=".bam"),
             position=3,
             prefix="--assembly",
         ),
         ToolInput("threads",
                   Int(optional=True),
                   default=CpuSelector(),
                   prefix="--threads"),
         ToolInput("blacklist",
                   Bed(optional=True),
                   position=4,
                   prefix="--blacklist"),
         ToolInput("tmpdir",
                   String(optional=True),
                   default="./TMP",
                   prefix="--workingdir"),
     ]
Esempio n. 23
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "reads": [
                     f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz",
                     f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz",
                 ],
                 "threads":
                 1,
             },
             output=FastqGzPair.basic_test("out", 824000, 408000, 416000) +
             Array.array_wrapper([TextFile.basic_test(
                 "datafile",
                 81000,
             )]),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reads": [
                     f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz",
                     f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz",
                 ],
                 "threads":
                 1,
             },
             output=self.minimal_test(),
         ),
     ]
Esempio n. 24
0
 def inputs(self):
     return [
         *super(Gatk4DepthOfCoverageBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="The SAM/BAM/CRAM file containing reads.",
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolInput(
             "reference", FastaWithDict(), prefix="-R", doc="Reference sequence"
         ),
         ToolInput(
             "outputPrefix",
             String(),
             prefix="-O",
             doc="An output file created by the walker. Will overwrite contents if file exists",
         ),
         ToolInput(
             "intervals",
             Array(Bed),
             prefix="--intervals",
             doc="-L (BASE) One or more genomic intervals over which to operate",
             prefix_applies_to_all_elements=True,
         ),
         *self.additional_args,
     ]
Esempio n. 25
0
    def constructor(self):
        self.input("reads", Array(FastqGz))

        kwargs_to_process = {
            "casva",
            "nano",
            "nofilter",
            "noextract",
            "nogroup",
            "format",
            "contaminants",
            "adapters",
            "limits",
            "kmers",
        }
        tins: Dict[str, ToolInput] = {
            i.id(): i
            for i in fastqc_single_instantiated.inputs()
        }
        kwargs = {
            i.id(): self.input(i.id(), i.input_type, doc=i.doc)
            for i in tins.values() if i.id() in kwargs_to_process
        }

        self.step(
            "fastqc",
            FastQCSingleLatest(read=self.reads, **kwargs),
            scatter="read",
        )

        self.capture_outputs_from_step(self.fastqc)
Esempio n. 26
0
 def inputs(self):
     return [
         ToolInput(
             "ubam",
             BamBai(),
             prefix="--UNMAPPED_BAM",
             prefix_applies_to_all_elements=True,
             doc=
             "Original SAM or BAM file of unmapped reads, which must be in queryname order.",
             position=10,
         ),
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="--ALIGNED_BAM",
             prefix_applies_to_all_elements=True,
             doc="SAM or BAM file(s) with alignment data.",
             position=10,
         ),
         ToolInput(
             "reference",
             FastaWithDict(optional=True),
             prefix="--REFERENCE_SEQUENCE",
             position=10,
             doc="Reference sequence file.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             position=10,
             prefix="--OUTPUT",
             doc="Merged SAM or BAM file to write to.",
         ),
         *self.additional_args,
     ]
Esempio n. 27
0
    def inputs(self):
        # Would be good to include this in the prefix:
        #   If(InputSelector("bam").length().equals(1), InputSelector("bam")[0].basename(), None)

        prefix = FirstOperator([InputSelector("outputPrefix"), "generated"])
        return [
            ToolInput(
                "bam",
                Array(Bam),
                prefix="-I",
                position=10,
                # secondaries_present_as={".bai": "^.bai"},
                doc=
                "One or more input SAM or BAM files to analyze. Must be coordinate sorted.",
            ),
            ToolInput("outputPrefix", String(optional=True)),
            ToolInput(
                "outputFilename",
                Filename(prefix=prefix, suffix=".markduped", extension=".bam"),
                position=10,
                prefix="-O",
                doc="File to write duplication metrics to",
            ),
            ToolInput(
                "metricsFilename",
                Filename(prefix=prefix, suffix=".metrics", extension=".txt"),
                position=10,
                prefix="-M",
                doc="The output file to write marked records to.",
            ),
            *super().inputs(),
            *self.additional_args,
        ]
Esempio n. 28
0
    def constructor(self):

        self.input("bams", Array(BamBai()))
        self.input("createIndex", Boolean, default=True)
        self.input("maxRecordsInRam", Int, default=5000000)
        self.input("sampleName", String(optional=True))

        self.step(
            "mergeSamFiles",
            Gatk4MergeSamFiles_4_1_2(
                bams=self.bams,
                useThreading=True,
                createIndex=self.createIndex,
                maxRecordsInRam=self.maxRecordsInRam,
                validationStringency="SILENT",
                sampleName=self.sampleName,
            ),
        )

        self.step(
            "markDuplicates",
            Gatk4MarkDuplicates_4_1_2(
                bam=self.mergeSamFiles.out,
                createIndex=self.createIndex,
                maxRecordsInRam=self.maxRecordsInRam,
            ),
        )
        self.output("out", source=self.markDuplicates.out)
Esempio n. 29
0
 def inputs(self):
     return [
         *super(Gatk4LearnReadOrientationModelBase, self).inputs(),
         *Gatk4LearnReadOrientationModelBase.additional_args,
         ToolInput(
             "f1r2CountsFiles",
             Array(TarFileGz),
             position=0,
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="Counts for the read orientation of fragments",
         ),
         ToolInput(
             "numEmIterations",
             Int(optional=True),
             position=1,
             prefix="--num-em-iterations",
             default=30,  # Sebastian thinks this is best
             doc="Amount of iterations for the em process before it bails",
         ),
         ToolInput("modelFileOut",
                   Filename(extension=".tar.gz"),
                   position=3,
                   prefix="-O"),
     ]
Esempio n. 30
0
 def inputs(self):
     return [
         ToolInput(
             "bam",
             Array(Bam),
             prefix="-I",
             position=10,
             # secondaries_present_as={".bai": "^.bai"},
             doc=
             "One or more input SAM or BAM files to analyze. Must be coordinate sorted.",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix="generated",
                 suffix=".markduped",
                 extension=".bam",
             ),
             position=10,
             prefix="-O",
             doc="File to write duplication metrics to",
         ),
         ToolInput(
             "metricsFilename",
             Filename(extension=".metrics.txt"),
             position=10,
             prefix="-M",
             doc="The output file to write marked records to.",
         ),
         *super().inputs(),
         *self.additional_args,
     ]