예제 #1
0
    def test_workflow_string_not_null(self):
        w = WorkflowBuilder("wf")
        w.input("inp", Optional[str])
        w.output("out", source=w.inp.assert_not_null())

        cwltool = w.translate("cwl", allow_empty_container=True, to_console=False)[0]
        print(cwltool)
예제 #2
0
    def process_subpipeline(**connections):
        w = WorkflowBuilder("somatic_subpipeline")

        w.input("bam", BamBai)
        w.input("intervals", Bed)
        w.input("reference", FastaWithDict)
        w.input("known_sites", Array(VcfTabix))

        w.step(
            "base_recalibrator",
            gatk4.Gatk4BaseRecalibratorLatest(
                bam=w.bam,
                intervals=w.intervals,
                reference=w.reference,
                knownSites=w.known_sites,
            ),
        )

        w.step(
            "apply_bqsr",
            gatk4.Gatk4ApplyBqsrLatest(
                bam=w.bam,
                recalFile=w.base_recalibrator.out,
                intervals=w.intervals,
                reference=w.reference,
            ),
        )

        w.output("out", source=w.apply_bqsr.out)

        return w(**connections)
예제 #3
0
    def test_string_formatter(self):
        wf = WorkflowBuilder("wf")
        wf.input("sampleName", str)
        wf.input("platform", str)

        wf.input(
            "readGroupHeaderLine",
            String(optional=True),
            default=StringFormatter(
                "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}",
                name=InputSelector("sampleName"),
                pl=InputSelector("platform"),
            ),
        )
        wf.step("print", EchoTestTool(inp=wf.readGroupHeaderLine))
        wf.output("out", source=wf.print)
        d, _ = cwl.CwlTranslator.translate_workflow(
            wf, with_container=False, allow_empty_container=True
        )
        stepinputs = d.save()["steps"][0]["in"]
        self.assertEqual(4, len(stepinputs))
        expression = stepinputs[-1]["valueFrom"]
        expected = (
            "$((inputs._print_inp_readGroupHeaderLine != null) "
            "? inputs._print_inp_readGroupHeaderLine "
            ': "@RG\\\\tID:{name}\\\\tSM:{name}\\\\tLB:{name}\\\\tPL:{pl}".replace(/\\{name\\}/g, inputs._print_inp_sampleName).replace(/\\{pl\\}/g, inputs._print_inp_platform))'
        )
        self.assertEqual(expected, expression)
예제 #4
0
 def test_add_output(self):
     w = WorkflowBuilder("test_add_input")
     w.step("stp", SingleTestTool(), ignore_missing=True)
     w.output("outputStep", str, source=w.stp)
     self.assertEqual(len(w.output_nodes), 1)
     self.assertEqual(w.outputStep, next(iter(w.output_nodes.values())))
     self.assertIsNotNone(w.nodes["stp"])
예제 #5
0
 def test_alias_selector(self):
     w = WorkflowBuilder("wf")
     w.input("inp", str)
     w.step("echo", EchoTestTool(inp=w.inp.as_type(str)))
     w.output("out", source=w.echo.out)
     sn: List[cwlgen.WorkflowStep] = cwl.translate_step_node(
         w.step_nodes["echo"], inputs_dict={"inp": ToolInput("inp", str)}
     )
     self.assertEqual("inp", sn[0].in_[0].source)
    def test_output_name_and_folder(self):
        w = WorkflowBuilder("wf")
        w.input("inp", str)
        w.step("print", Echo(inp=w.inp))
        w.output("out", source=w.print, output_name=w.inp, output_folder=[w.inp])

        inputs = {"inp": ["test1", "test2"]}
        modifier = BatchPipelineModifier(BatchRunRequirements(["inp"], "inp"))
        new_workflow = modifier.tool_modifier(w, inputs, {})
        print(new_workflow)
    def process_subpipeline(**connections):
        w = WorkflowBuilder("split_bam_subpipeline")

        w.input("bam", BamBai)
        w.input("intervals", Bed(optional=True))
        w.step("split_bam",
               gatk4.Gatk4SplitReads_4_1_3(bam=w.bam, intervals=w.intervals))
        w.output("out", source=w.split_bam.out)

        return w(**connections)
예제 #8
0
    def test_read_contents(self):
        w = WorkflowBuilder("wf")
        w.input("inp", str)
        w.step("stp", EchoTestTool(inp=w.inp))
        w.output("out", source=w.stp.out.contents())

        w_cwl = cwl.CwlTranslator().translate_workflow(w, with_container=False)[0]

        self.assertEqual(2, len(w_cwl.steps))
        self.assertEqual(
            "${return {out: inputs._stpout.contents }}", w_cwl.steps[1].run.expression
        )
        self.assertTrue(w_cwl.steps[1].run.inputs[0].loadContents)
예제 #9
0
    def test_expression_default(self):

        wf = WorkflowBuilder("test_expression_defaults")
        wf.input("inp", Optional[str])

        wf.step(
            "echo",
            EchoTestTool(inp="Hello, " +
                         If(IsDefined(wf.inp), wf.inp, ", Michael!")),
        )

        wf.output("out", source=wf.echo)

        wf.translate("cwl")
예제 #10
0
    def test_subworkflow(self):
        w = WorkflowBuilder("test_subworkflow")

        sub_w = WorkflowBuilder("subworkflow")
        sub_w.input("sub_inp", str)
        sub_w.step("sub_stp", SingleTestTool(inputs=sub_w.sub_inp))
        sub_w.output("sub_out", source=sub_w.sub_stp.out)

        w.input("inp", str)
        w.step("stp_workflow", sub_w(sub_inp=w.inp))
        w.output("out", source=w.stp_workflow.sub_out)

        # would be good to come up with some tests
        # w.translate("wdl")
        self.assertTrue(True)
예제 #11
0
    def setUpClass(cls):

        w = WorkflowBuilder("test_operators")

        w.input("inp", Array(File()))
        inval = w.inp[0].basename()
        w.step("echo", SingleTestTool(input1=inval))
        w.output("out", source=w.echo)
        cls.wf = w

        w2 = WorkflowBuilder("test_scattered_operator_with_alias")

        w2.input("inp", Array(Array(String)))
        w2.step("echo", SingleTestTool(input1=w2.inp[0]), scatter="input1")
        w2.output("out", source=w.echo)
        cls.wf2 = w2
예제 #12
0
def recursively_build_workflow_with_layers(layers):
    w = WorkflowBuilder(f"scattered_with_{layers}")

    w.input("scatters", int, default=3)
    w.input("seed_hash", Optional[str])
    w.input("bias", Optional[int])

    w.step("generate_random_ints",
           GenerateIntegers(numbers_to_generate=w.scatters))

    w.step(
        "generate_hashes",
        CalculateMd5HashOfInt(value=w.generate_random_ints.out),
        scatter="value",
    )

    joined_generate_hashes = JoinOperator(w.generate_hashes.out, ",")

    if layers > 0:

        innerworkflow = recursively_build_workflow_with_layers(layers - 1)
        w.step(
            "inner",
            innerworkflow(scatters=w.generate_random_ints),
            scatter="scatters",
        )
        joined_inner = JoinOperator(w.inner.out_hash, ",")
        post_hash_inp = (j.logical.If(
            j.logical.IsDefined(w.seed_hash),
            w.seed_hash + joined_inner,
            joined_inner,
        ) + joined_generate_hashes)
    else:
        post_hash_inp = j.logical.If(
            j.logical.IsDefined(w.seed_hash),
            w.seed_hash + joined_generate_hashes,
            joined_generate_hashes,
        )

    w.step(
        "post_hash",
        CalculateMd5Hash(value=post_hash_inp),
    )

    w.output("out_hash", source=w.post_hash.out)

    return w
예제 #13
0
    def process_subpipeline(**connections):
        w = WorkflowBuilder("somatic_subpipeline")

        w.input("reference", FastaWithDict)
        w.input("reads", Array(FastqGzPair))
        w.input("cutadapt_adapters", File(optional=True))

        w.input("sample_name", String)

        w.step("fastqc", FastQC_0_11_5(reads=w.reads), scatter="reads")

        w.step(
            "getfastqc_adapters",
            ParseFastqcAdaptors(
                fastqc_datafiles=w.fastqc.datafile,
                cutadapt_adaptors_lookup=w.cutadapt_adapters,
            ),
            scatter="fastqc_datafiles",
        )

        w.step(
            "align_and_sort",
            BwaAligner(
                fastq=w.reads,
                reference=w.reference,
                sample_name=w.sample_name,
                sortsam_tmpDir=".",
                cutadapt_adapter=w.getfastqc_adapters,
                cutadapt_removeMiddle3Adapter=w.getfastqc_adapters,
            ),
            scatter=[
                "fastq", "cutadapt_adapter", "cutadapt_removeMiddle3Adapter"
            ],
        )
        w.step(
            "merge_and_mark",
            MergeAndMarkBams_4_1_3(bams=w.align_and_sort.out,
                                   sampleName=w.sample_name),
        )

        w.output("out", source=w.merge_and_mark.out)
        w.output("reports",
                 source=w.fastqc.out,
                 output_folder=[w.sample_name, "reports"])

        return w(**connections)
예제 #14
0
    def test_with_str_default(self):
        w = WorkflowBuilder("wf")
        w.input("inp", str, default="hello")
        w.output("out", source=w.inp)
        out, _, _ = w.translate("wdl", to_console=False)
        expected = """\
version development



workflow wf {
  input {
    String? inp
  }
  output {
    String out = select_first([inp, "hello"])
  }
}"""
        self.assertEqual(expected, out)
예제 #15
0
    def test_with_int_default(self):
        w = WorkflowBuilder("wf")
        w.input("inp", int, default=0)
        w.output("out", source=w.inp)
        out, _, _ = w.translate("wdl", to_console=False)
        expected = """\
version development



workflow wf {
  input {
    Int? inp
  }
  output {
    Int out = select_first([inp, 0])
  }
}"""
        self.assertEqual(expected, out)
예제 #16
0
 def process_subpipeline(**connections):
     w = WorkflowBuilder("samtools_mpileup_subpipeline")
     w.input("vcf", Vcf)
     w.input("bam", BamBai)
     w.input("reference", FastaWithDict)
     w.step(
         "samtools_mpileup",
         SamToolsMpileupLatest(
             bam=w.bam,
             positions=w.vcf,
             reference=w.reference,
             countOrphans=True,
             noBAQ=True,
             minBQ=0,
             maxDepth=10000,
         ),
     )
     w.output("out", source=w.samtools_mpileup.out)
     return w(**connections)
예제 #17
0
    def test_filter_null(self):
        T = CommandToolBuilder(
            tool="testsingleinput",
            base_command="echo",
            inputs=[ToolInput("inp", str, position=0)],
            outputs=[ToolOutput("out", Stdout)],
            version="v1",
            container=None,
        )
        w = WorkflowBuilder("wf")
        w.input("inp", Array(Optional[str], optional=True))
        w.step("stp", T(inp=FilterNullOperator(w.inp)), scatter="inp")
        w.output("out", source=w.stp.out)

        w_cwl = cwl.CwlTranslator().translate_workflow(w, with_container=False)[0]
        self.assertEqual(2, len(w_cwl.steps))
        self.assertEqual(
            "_evaluate_prescatter-stp-inp/out", w_cwl.steps[1].in_[0].source
        )
예제 #18
0
    def test_simple(self):
        w = WorkflowBuilder("wf")
        w.input("inp", str)
        w.output("out", source=w.inp)
        out, _, _ = w.translate("wdl", to_console=False)
        expected = """\
version development



workflow wf {
  input {
    String inp
  }
  output {
    String out = inp
  }
}"""
        self.assertEqual(expected, out)
예제 #19
0
    def test_array_step_input(self):
        wf = WorkflowBuilder("cwl_test_array_step_input")
        wf.input("inp1", Optional[str])
        wf.input("inp2", Optional[str])

        wf.step(
            "print",
            ArrayTestTool(
                inps=[
                    If(IsDefined(wf.inp1), wf.inp1, "default1"),
                    If(IsDefined(wf.inp2), wf.inp2 + "_suffix", ""),
                ]
            ),
        ),

        wf.output("out", source=wf.print)

        ret, _, _ = wf.translate("cwl", allow_empty_container=True, to_console=False)
        self.maxDiff = None
        self.assertEqual(cwl_arraystepinput, ret)
예제 #20
0
    def test_basic(self):
        w = WorkflowBuilder("my_conditional_workflow")

        w.input("inp", String(optional=True))

        w.step(
            "print_if_has_value",
            TestTool(testtool=w.inp),
            # only print if the input "inp" is defined.
            when=IsDefined(w.inp),
        )

        w.output("out", source=w.print_if_has_value)

        inputs_dict = {"inp": ToolInput("inp", str)}

        c = cwl.translate_step_node(w.print_if_has_value, inputs_dict=inputs_dict)[0]

        self.assertEqual("$((inputs.__when_inp != null))", c.when)
        extra_input: cwlgen.WorkflowStepInput = c.in_[-1]
        self.assertEqual("__when_inp", extra_input.id)
예제 #21
0
    def test_string_formatter_stepinput(self):
        wf = WorkflowBuilder("wf")
        wf.input("sampleName", str)
        wf.input("platform", str)

        wf.step(
            "print",
            EchoTestTool(
                inp=StringFormatter(
                    "@RG\\tID:{name}\\tSM:{name}\\tLB:{name}\\tPL:{pl}",
                    name=wf.sampleName,
                    pl=wf.platform,
                )
            ),
        )
        wf.output("out", source=wf.print)
        d, _ = cwl.CwlTranslator.translate_workflow(
            wf, with_container=False, allow_empty_container=True
        )
        stepinputs = d.save()["steps"][0]["in"]
        self.assertEqual(3, len(stepinputs))
        expression = stepinputs[-1]["valueFrom"]
        expected = '$("@RG\\\\tID:{name}\\\\tSM:{name}\\\\tLB:{name}\\\\tPL:{pl}".replace(/\\{name\\}/g, inputs._print_inp_sampleName).replace(/\\{pl\\}/g, inputs._print_inp_platform))'
        self.assertEqual(expected, expression)
예제 #22
0
    def process_subpipeline(**connections):
        w = WorkflowBuilder("somatic_subpipeline")

        # INPUTS
        w.input("reads", Array(FastqGzPair))
        w.input("sample_name", String)
        w.input("reference", FastaWithDict)
        w.input("cutadapt_adapters", File(optional=True))
        w.input("gatk_intervals", Array(Bed))
        w.input("snps_dbsnp", VcfTabix)
        w.input("snps_1000gp", VcfTabix)
        w.input("known_indels", VcfTabix)
        w.input("mills_indels", VcfTabix)

        # STEPS
        w.step("fastqc", FastQC_0_11_8(reads=w.reads), scatter="reads")

        w.step(
            "getfastqc_adapters",
            ParseFastqcAdaptors(
                fastqc_datafiles=w.fastqc.datafile,
                cutadapt_adaptors_lookup=w.cutadapt_adapters,
            ),
            scatter="fastqc_datafiles",
        )

        w.step(
            "align_and_sort",
            BwaAligner(
                fastq=w.reads,
                reference=w.reference,
                sample_name=w.sample_name,
                sortsam_tmpDir=None,
                cutadapt_adapter=w.getfastqc_adapters,
                cutadapt_removeMiddle3Adapter=w.getfastqc_adapters,
            ),
            scatter=[
                "fastq", "cutadapt_adapter", "cutadapt_removeMiddle3Adapter"
            ],
        )

        w.step(
            "merge_and_mark",
            MergeAndMarkBams_4_1_3(bams=w.align_and_sort.out,
                                   sampleName=w.sample_name),
        )

        # Temporarily remove GATK4 DepthOfCoverage for performance reasons, see:
        #   https://gatk.broadinstitute.org/hc/en-us/community/posts/360071895391-Speeding-up-GATK4-DepthOfCoverage

        # w.step(
        #     "coverage",
        #     Gatk4DepthOfCoverage_4_1_6(
        #         bam=w.merge_and_mark.out,
        #         reference=w.reference,
        #         intervals=w.gatk_intervals,
        #         omitDepthOutputAtEachBase=True,
        #         # countType="COUNT_FRAGMENTS_REQUIRE_SAME_BASE",
        #         summaryCoverageThreshold=[1, 50, 100, 300, 500],
        #         outputPrefix=w.sample_name,
        #     ),
        # )

        w.step(
            "calculate_performancesummary_genomefile",
            GenerateGenomeFileForBedtoolsCoverage(reference=w.reference),
        )

        w.step(
            "performance_summary",
            PerformanceSummaryGenome_0_1_0(
                bam=w.merge_and_mark.out,
                sample_name=w.sample_name,
                genome_file=w.calculate_performancesummary_genomefile.out,
            ),
        )

        # OUTPUTS
        w.output("out_bam", source=w.merge_and_mark.out)
        w.output("out_fastqc_reports", source=w.fastqc.out)
        # w.output("depth_of_coverage", source=w.coverage.out_sampleSummary)
        w.output(
            "out_performance_summary",
            source=w.performance_summary.performanceSummaryOut,
        )

        return w(**connections)
예제 #23
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:

        # Build custom pipeline

        w = WorkflowBuilder(tool.id(),
                            friendly_name=tool.friendly_name(),
                            version=tool.version())

        ins = tool.tool_inputs()
        insdict = {i.id(): i for i in ins}
        fields = set(self.batch.fields)

        inkeys = set(i.id() for i in ins)
        invalid_keys = fields - inkeys
        if len(invalid_keys) > 0:
            raise Exception(
                f"Couldn't create batchtool from fields {', '.join(invalid_keys)} "
                f"as they do not exist on '{tool.id()}'")

        if self.batch.groupby not in inputs:
            raise Exception(
                f"the group_by field '{self.batch.groupby}' was not found in the inputs"
            )

        innode_base = {}

        for i in ins:
            if i.id() in fields:
                continue

            default = i.default
            if isinstance(default, Selector):
                default = None

            innode_base[i.id()] = w.input(i.id(),
                                          i.intype,
                                          default=default,
                                          doc=i.doc)

        raw_groupby_values = inputs[self.batch.groupby]

        duplicate_keys = find_duplicates(raw_groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"There are duplicate group_by ({self.batch.groupby}) keys in the input: "
                + ", ".join(duplicate_keys))

        groupby_values = [
            Validators.transform_identifier_to_be_valid(ident)
            for ident in raw_groupby_values
        ]
        duplicate_keys = find_duplicates(groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, "
                f"after this transformation, there were duplicates keys: " +
                ", ".join(duplicate_keys))

        w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values)

        steps_created = []

        stepid_from_gb = lambda gb: f"{gb}_{tool.id()}"

        for gbvalue in groupby_values:

            extra_ins = {}
            for f in fields:
                newkey = f"{f}_{gbvalue}"
                extra_ins[f] = w.input(newkey, insdict[f].intype)

            steps_created.append(
                w.step(stepid_from_gb(gbvalue), tool(**innode_base,
                                                     **extra_ins)))

        for out in tool.tool_outputs():
            output_folders = []
            output_name = out.id()
            if isinstance(tool, WorkflowBase):
                outnode = tool.output_nodes[out.id()]
                output_folders = outnode.output_folder or []

                if outnode.output_name is not None:
                    output_name = outnode.output_name

            for idx, gbvalue, raw_gbvalue in zip(range(len(groupby_values)),
                                                 groupby_values,
                                                 raw_groupby_values):
                transformed_inputs = {
                    **inputs,
                    **{f: inputs[f][idx]
                       for f in fields}
                }

                output_folders_transformed = Operator.evaluate_arg(
                    output_folders, transformed_inputs)
                output_name_transformed = Operator.evaluate_arg(
                    output_name, transformed_inputs)

                w.output(
                    f"{gbvalue}_{out.id()}",
                    source=w[stepid_from_gb(gbvalue)][out.id()],
                    output_name=output_name_transformed,
                    output_folder=[
                        raw_gbvalue, *(output_folders_transformed or [])
                    ],
                )

        return w
예제 #24
0
    "samtoolsview",
    SamToolsView_1_9(sam=w.bwamem.out),
)

# Use `gatk4 MarkDuplicates` on the output of samtoolsview
#   - The output of BWA is query-grouped, providing "queryname" is good enough
w.step(
    "markduplicates",
    Gatk4MarkDuplicates_4_1_4(bam=w.samtoolsview.out,
                              assumeSortOrder="queryname"),
)
# Use `gatk4 SortSam` on the output of markduplicates
#   - Use the "coordinate" sortOrder
w.step("sortsam",
       Gatk4SortSam_4_1_4(
           bam=w.markduplicates.out,
           sortOrder="coordinate",
       ))

# Use `gatk4 SetNmMdAndUqTags` to calculate standard tags for BAM
w.step(
    "fix_tags",
    Gatk4SetNmMdAndUqTags_4_1_4(
        bam=w.sortsam.out,
        reference=w.reference,
    ),
)

# Output our final bam
w.output("out_bam", source=w.fix_tags.out)
예제 #25
0
    @staticmethod
    def base_command():
        return "echo"  # non functional tool

    def inputs(self) -> List[ToolInput]:
        return [ToolInput("inp", DataTypeWithSecondary())]

    def outputs(self) -> List[ToolOutput]:
        return [
            ToolOutput("out",
                       DataTypeWithSecondary(),
                       glob=InputSelector("inp"))
        ]


if __name__ == "__main__":
    w = WorkflowBuilder("test_workflow")

    w.input("inp", DataTypeWithSecondary)
    w.step("stp", ToolThatAcceptsAndReturnsSecondary(inp=w.inp))
    w.output("out", source=w.stp)
    w.translate("wdl")

    w2 = WorkflowBuilder("scattered_test_workflow")
    w2.input("inp", Array(DataTypeWithSecondary), default=["path/to/file.ext"])
    w2.step("stp",
            ToolThatAcceptsAndReturnsSecondary(inp=w2.inp),
            scatter="inp")
    w2.output("out", source=w2.stp)
    w2.translate("wdl")
예제 #26
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:
        from janis_bioinformatics.data_types import FastaWithDict, Vcf, Bed
        from janis_bioinformatics.tools.illumina import HapPyValidator_0_3_9

        failed_outputs, untyped_outputs = ensure_outputs_are_in_workflow_and_are_compatible(
            tool, self.validation.fields, Vcf())

        if len(failed_outputs) > 0:
            raise Exception(
                f"Some outputs for validation were not found in the tool '{tool.id()}': "
                f"{', '.join(failed_outputs)}")

        if len(untyped_outputs) > 0:
            Logger.critical(
                f"Some outputs for validation from the tool '{tool.id()}' were not "
                f"compatible with VCF: {', '.join(untyped_outputs)}")

        w = WorkflowBuilder(tool.id() + "_validated")

        w.input("validatorReference",
                FastaWithDict,
                value=self.validation.reference)
        w.input("validatorTruthVCF", Vcf, value=self.validation.truthVCF)
        w.input("validatorIntervals",
                Bed(optional=True),
                value=self.validation.intervals)

        inpdict = {
            i.id(): w.input(i.id(), i.intype)
            for i in tool.tool_inputs()
        }
        toolstp = w.step(tool.id(), tool(**inpdict))

        if isinstance(tool, Workflow):
            wf: Workflow = tool
            for o in wf.output_nodes.values():
                w.output(
                    identifier=o.id(),
                    source=toolstp[o.id()],
                    output_folder=o.output_folder,
                    output_name=o.output_name,
                )
        else:
            for o in tool.tool_outputs():
                w.output(identifier=o.id(), source=toolstp[o.id()])

        for o in self.validation.fields:

            sid = "validator_" + o
            valstp = w.step(
                sid,
                HapPyValidator_0_3_9(
                    compareVCF=toolstp[o],
                    reportPrefix=
                    o,  # this will generate an input node with format validator_{o}_reportPrefix
                    reference=w.validatorReference,
                    truthVCF=w.validatorTruthVCF,
                    intervals=w.validatorIntervals,
                ),
            )

            # Connect all the outputs of the validator to an output
            for vo in valstp.tool.outputs():
                w.output(
                    f"validated_{o}_{vo.id()}",
                    source=valstp[vo.id()],
                    output_folder="validated",
                )

        return w
예제 #27
0
w.step(
    "applybqsr",
    Gatk4ApplyBQSR_4_1_4(
        bam=w.fix_tags.out,
        reference=w.reference,
        recalFile=w.baserecalibration.out_recalibration_report,
    ),
)

# Use HaplotypeCaller as our variant caller
w.step(
    "haplotypecaller",
    Gatk4HaplotypeCaller_4_1_4(bam=w.applybqsr.out_bam, reference=w.reference),
)

w.output("out_recalibration_table",
         source=w.baserecalibration.out_recalibration_report)
w.output("out_bam", source=w.applybqsr.out_bam)
w.output("out_assembledbam", source=w.haplotypecaller.out_bam)
w.output("out_variants", source=w.haplotypecaller.out_vcf)

if __name__ == "__main__":

    import json
    from janis_core.translations.cwl import CwlTranslator

    out = CwlTranslator.translate_workflow_to_all_in_one(w).save()
    with open("/Users/franklinmichael/Desktop/tmp/janis/bcc/vc.json",
              "w+") as f:
        json.dump(out, f)

    # w.translate("cwl", export_path="~/Desktop/tmp/janis/bcc/", to_disk=True)
예제 #28
0
                       FastaBwa,
                       glob=InputSelector("reference")),
        ]

    def base_command(self):
        return "echo"


if __name__ == "__main__":
    w = WorkflowBuilder("test_workflow")

    # EXAMPLE 1

    w.input("inp", DataTypeWithSecondary)
    w.step("stp", ToolThatAcceptsAndReturnsSecondary(inp=w.inp))
    w.output("out", source=w.stp)
    w.translate("wdl")

    # EXAMPLE 2

    w2 = WorkflowBuilder("scattered_test_workflow")
    w2.input("inp", Array(DataTypeWithSecondary), default=["path/to/file.ext"])
    w2.step("stp",
            ToolThatAcceptsAndReturnsSecondary(inp=w2.inp),
            scatter="inp")
    w2.output("out", source=w2.stp)
    w2.translate("wdl")

    # EXAMPLE 3

    w3 = WorkflowBuilder("scattered_bioinf_complex")
from janis_bioinformatics.tools.bwa import BwaMemLatest
from janis_bioinformatics.tools.samtools import SamToolsView_1_9
from janis_bioinformatics.tools.gatk4 import Gatk4SortSam_4_1_2

w = WorkflowBuilder("alignmentWorkflow")

# Inputs
w.input("sample_name", String)
w.input("read_group", String)
w.input("fastq", FastqGzPair)
w.input("reference", FastaWithDict)

# Steps
w.step(
    "bwamem",
    BwaMemLatest(reads=w.fastq,
                 readGroupHeaderLine=w.read_group,
                 reference=w.reference),
)
w.step("samtoolsview", SamToolsView_1_9(sam=w.bwamem.out))

w.step(
    "sortsam",
    Gatk4SortSam_4_1_2(bam=w.samtoolsview.out,
                       sortOrder="coordinate",
                       createIndex=True),
)

# Outputs
w.output("out", source=w.sortsam.out)
)

w = WorkflowBuilder("alignmentWorkflow")

# Inputs
w.input("sample_name", String)
w.input("read_group", String)
w.input("fastq", FastqGzPair)
w.input("reference", FastaWithDict)

# Steps
w.step(
    "bwamem",
    BwaMemLatest(reads=w.fastq,
                 readGroupHeaderLine=w.read_group,
                 reference=w.reference),
)
w.step("samtoolsview", SamToolsView_1_9(sam=w.bwamem.out))

w.step(
    "sortsam",
    Gatk4SortSam_4_1_2(bam=w.samtoolsview.out,
                       sortOrder="coordinate",
                       createIndex=True),
)

w.step("markduplicates", Gatk4MarkDuplicates_4_1_4(bam=w.sortsam.out))

# Outputs
w.output("out", source=w.markduplicates.out)