Ejemplo n.º 1
0
    def type_to_janis_type(self, typ: cli_types.CliType,
                           optional: bool) -> janis.DataType:

        if isinstance(typ, cli_types.CliFile):
            return janis.File(optional=optional)
        elif isinstance(typ, cli_types.CliDir):
            return janis.Directory(optional=optional)
        elif isinstance(typ, cli_types.CliString):
            return janis.String(optional=optional)
        elif isinstance(typ, cli_types.CliFloat):
            return janis.Float(optional=optional)
        elif isinstance(typ, cli_types.CliInteger):
            return janis.Int(optional=optional)
        elif isinstance(typ, cli_types.CliBoolean):
            return janis.Boolean(optional=optional)
        elif isinstance(typ, cli_types.CliEnum):
            return janis.String(optional=optional)
        elif isinstance(typ, cli_types.CliList):
            # TODO: how is Array<String?> represented?
            inner = self.type_to_janis_type(typ.value, optional=False)
            return janis.Array(inner, optional=optional)

        elif isinstance(typ, cli_types.CliTuple):
            return self.type_to_janis_type(CliType.lowest_common_type(
                typ.values),
                                           optional=False)
        else:
            raise Exception(f"Invalid type {typ}!")
Ejemplo n.º 2
0
    def parse_wdl_type(self, t: WDL.Type.Base):
        optional = t.optional
        if isinstance(t, WDL.Type.Int):
            return j.Int(optional=optional)
        elif isinstance(t, WDL.Type.String):
            return j.String(optional=optional)
        elif isinstance(t, WDL.Type.Float):
            return j.Float(optional=optional)
        elif isinstance(t, WDL.Type.Boolean):
            return j.Boolean(optional=optional)
        elif isinstance(t, WDL.Type.File):
            return j.File(optional=optional)
        elif isinstance(t, WDL.Type.Directory):
            return j.Directory(optional=optional)
        elif isinstance(t, WDL.Type.Array):
            return j.Array(self.parse_wdl_type(t.item_type), optional=optional)

        raise Exception(
            f"Didn't handle WDL type conversion for '{t}' ({type(t)})")
Ejemplo n.º 3
0
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
How to create an empty directory, using the value of an input
"""

import janis_core as j

CLT = j.CommandToolBuilder(
    tool="create_initial_stuff",
    base_command=["ls", "*"],
    version="dev",
    container="ubuntu:latest",
    inputs=[
        j.ToolInput("name_of_output_folder",
                    j.String(optional=True),
                    default="some-string")
    ],
    outputs=[j.ToolOutput("out_dir", j.Directory, selector="some-string")],
    directories_to_create=[j.InputSelector("name_of_output_folder")],
    files_to_create=[(
        j.StringFormatter("{dir}/file.txt",
                          dir=j.InputSelector("name_of_output_folder")),
        "contents of file",
    )],
)

if __name__ == "__main__":
    CLT().translate("cwl")
Ejemplo n.º 4
0
 def constructor(self):
     self.input("inp", j.String(optional=True), default="Hello, world!")
     self.step("hello", Echo(inp=self.inp))
     self.output("out", source=self.hello)
Ejemplo n.º 5
0
    def from_cwl_inner_type(self, cwl_type) -> j.DataType:
        if isinstance(cwl_type, str):
            optional = "?" in cwl_type
            cwl_type = cwl_type.replace("?", "")
            array_count = 0
            while cwl_type.endswith("[]"):
                array_count += 1
                cwl_type = cwl_type[:-2]

            if cwl_type == "File":
                inner = j.File
            elif cwl_type == "Directory":
                inner = j.Directory
            elif cwl_type == "string":
                inner = j.String
            elif cwl_type == "int":
                inner = j.Int
            elif cwl_type == "float":
                inner = j.Float
            elif cwl_type == "boolean":
                inner = j.Boolean
            elif cwl_type == "stdout":
                inner = j.Stdout
            elif cwl_type == "stderr":
                inner = j.Stderr
            elif cwl_type == "Any":
                inner = j.String
            elif cwl_type == "long":
                inner = j.Int
            else:
                raise Exception(f"Can't detect type {cwl_type}")
            return inner(optional=optional)

        elif isinstance(cwl_type, list):
            optional = None
            types = []
            for c in cwl_type:
                if c == "null":
                    optional = True
                else:
                    types.append(self.ingest_cwl_type(c, []))

            if len(types) == 1:
                if optional is not None:
                    types[0].optional = optional
                return types[0]
            else:
                from janis_core.types.common_data_types import UnionType

                if optional is not None:
                    for inner in types:
                        inner.optional = optional

                return UnionType(*types)

        elif isinstance(cwl_type, self.cwlgen.CommandInputArraySchema):
            return j.Array(self.from_cwl_inner_type(cwl_type.items))
        elif isinstance(cwl_type, self.cwlgen.InputArraySchema):
            return j.Array(self.from_cwl_inner_type(cwl_type.items))
        elif isinstance(cwl_type, self.cwlgen.CommandOutputArraySchema):
            return j.Array(self.from_cwl_inner_type(cwl_type.items))
        elif isinstance(cwl_type, self.cwlgen.OutputArraySchema):
            return j.Array(self.from_cwl_inner_type(cwl_type.items))
        elif isinstance(cwl_type, self.cwlgen.InputEnumSchema):
            return j.String()

        else:
            raise Exception(f"Can't parse type {type(cwl_type).__name__}")
Ejemplo n.º 6
0
 def inputs(self) -> List[j.ToolInput]:
     return [
         j.ToolInput("truthVCF", Vcf(), position=1),
         j.ToolInput("compareVCF", Vcf(), position=2),
         j.ToolInput(
             "reportPrefix",
             j.Filename(),
             prefix="--report-prefix",
             doc="(-o)  Filename prefix for report output.",
         ),
         j.ToolInput(
             "reference",
             FastaWithDict(),
             prefix="--reference",
             doc="(-r)  Specify a reference file.",
         ),
         j.ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--target-regions",
             doc=
             "(-T)  Restrict analysis to given (dense) regions (using -T in bcftools).",
         ),
         j.ToolInput(
             "version",
             j.Boolean(optional=True),
             prefix="--version",
             doc="(-v) Show version number and exit.",
         ),
         j.ToolInput(
             "scratchPrefix",
             j.String(optional=True),
             prefix="--scratch-prefix",
             doc="Directory for scratch files.",
         ),
         j.ToolInput(
             "keepScratch",
             j.String(optional=True),
             prefix="--keep-scratch",
             doc=
             "Filename prefix for scratch report output. Annotation format in input VCF file.",
         ),
         j.ToolInput(
             "falsePositives",
             Bed(optional=True),
             prefix="--false-positives",
             doc=
             "(-f)  False positive / confident call regions (.bed or .bed.gz). "
             "Calls outside these regions will be labelled as UNK.",
         ),
         j.ToolInput(
             "stratification",
             Tsv(optional=True),
             prefix="--stratification",
             doc=
             " Stratification file list (TSV format -- first column is region name, "
             "second column is file name).",
         ),
         j.ToolInput(
             "stratificationRegion",
             j.String(optional=True),
             prefix="--stratification-region",
             doc=
             "Add single stratification region, e.g. --stratification-region TEST:test.bed",
         ),
         j.ToolInput(
             "stratificationFixchr",
             j.String(optional=True),
             prefix="--stratification-fixchr",
             doc=" Add chr prefix to stratification files if necessary",
         ),
         j.ToolInput(
             "writeVcf",
             j.Boolean(optional=True),
             prefix="--write-vcf",
             doc="(-V) Write an annotated VCF.",
         ),
         j.ToolInput(
             "writeCounts",
             j.Boolean(optional=True),
             prefix="--write-counts",
             doc="(-X) Write advanced counts and metrics.",
         ),
         j.ToolInput(
             "noWriteCounts",
             j.Boolean(optional=True),
             prefix="--no-write-counts",
             doc="Do not write advanced counts and metrics.",
         ),
         j.ToolInput(
             "outputVtc",
             j.Boolean(optional=True),
             prefix="--output-vtc",
             doc=
             "Write VTC field in the final VCF which gives the counts each position has contributed to.",
         ),
         j.ToolInput(
             "preserveInfo",
             j.Boolean(optional=True),
             prefix="--preserve-info",
             doc=
             "When using XCMP, preserve and merge the INFO fields in truth and query. "
             "Useful for ROC computation.",
         ),
         j.ToolInput(
             "roc",
             j.String(optional=True),
             prefix="--roc",
             doc=
             "Select a feature to produce a ROC on (INFO feature, QUAL, GQX, ...).",
         ),
         j.ToolInput(
             "noRoc",
             j.Boolean(optional=True),
             prefix="--no-roc",
             doc=
             "Disable ROC computation and only output summary statistics for more concise output.",
         ),
         j.ToolInput(
             "rocRegions",
             j.String(optional=True),
             prefix="--roc-regions",
             doc=" Select a list of regions to compute ROCs in. By default, "
             "only the '*' region will produce ROC output (aggregate variant counts).",
         ),
         j.ToolInput(
             "rocFilter",
             j.String(optional=True),
             prefix="--roc-filter",
             doc=" Select a filter to ignore when making ROCs.",
         ),
         j.ToolInput(
             "rocDelta",
             j.Int(optional=True),
             prefix="--roc-delta",
             doc=" Minimum spacing between ROC QQ levels.",
         ),
         j.ToolInput(
             "ciAlpha",
             j.Int(optional=True),
             prefix="--ci-alpha",
             doc=
             "Confidence level for Jeffrey's CI for recall, precision and fraction of non-assessed calls.",
         ),
         j.ToolInput(
             "noJson",
             j.Boolean(optional=True),
             prefix="--no-json",
             doc="Disable JSON file output.",
         ),
         # j.ToolInput("location", Array(j.String(), optional=True), prefix="--location", separator=",",
         #           doc="(-l)  Comma-separated list of locations [use naming after preprocessing], "
         #               "when not specified will use whole VCF."),
         j.ToolInput(
             "passOnly",
             j.Boolean(optional=True),
             prefix="--pass-only",
             doc="Keep only PASS variants.",
         ),
         # j.ToolInput("filtersOnly", Array(j.String(), optional=True), prefix="--filters-only", separator=",",
         #           doc=" Specify a comma-separated list of filters to apply "
         #               "(by default all filters are ignored / passed on."),
         j.ToolInput(
             "restrictRegions",
             j.Boolean(optional=True),
             prefix="--restrict-regions",
             doc=
             "(-R)  Restrict analysis to given (sparse) regions (using -R in bcftools).",
         ),
         j.ToolInput(
             "leftshift",
             j.Boolean(optional=True),
             prefix="--leftshift",
             doc="(-L) Left-shift variants safely.",
         ),
         j.ToolInput(
             "noLeftshift",
             j.Boolean(optional=True),
             prefix="--no-leftshift",
             doc="Do not left-shift variants safely.",
         ),
         j.ToolInput(
             "decompose",
             j.Boolean(optional=True),
             prefix="--decompose",
             doc=
             "Decompose variants into primitives. This results in more granular counts.",
         ),
         j.ToolInput(
             "noDecompose",
             j.Boolean(optional=True),
             prefix="--no-decompose",
             doc="(-D) Do not decompose variants into primitives.",
         ),
         j.ToolInput(
             "bcftoolsNorm",
             j.Boolean(optional=True),
             prefix="--bcftools-norm",
             doc="Enable preprocessing through bcftools norm -c x -D "
             "(requires external preprocessing to be switched on).",
         ),
         j.ToolInput(
             "fixchr",
             j.Boolean(optional=True),
             prefix="--fixchr",
             doc=
             "Add chr prefix to VCF records where necessary (default: auto, attempt to match reference).",
         ),
         j.ToolInput(
             "noFixchr",
             j.Boolean(optional=True),
             prefix="--no-fixchr",
             doc=
             "Do not add chr prefix to VCF records (default: auto, attempt to match reference).",
         ),
         j.ToolInput(
             "bcf",
             j.Boolean(optional=True),
             prefix="--bcf",
             doc=
             "Use BCF internally. This is the default when the input file is in BCF format already. "
             "Using BCF can speed up temp file access, but may fail for VCF files that have broken "
             "headers or records that don't comply with the header.",
         ),
         j.ToolInput(
             "somatic",
             j.Boolean(optional=True),
             prefix="--somatic",
             doc=
             "Assume the input file is a somatic call file and squash all columns into one, "
             "putting all FORMATs into INFO + use half genotypes (see also --set-gt). "
             "This will replace all sample columns and replace them with a single one. "
             "This is used to treat Strelka somatic files Possible values for this parameter: "
             "half / hemi / het / hom / half to assign one of the following genotypes to the "
             "resulting sample: 1 | 0/1 | 1/1 | ./1. This will replace all sample columns and "
             "replace them with a single one.",
         ),
         j.ToolInput(
             "setGT",
             j.Boolean(optional=True),
             prefix="--set-gt",
             doc=
             "This is used to treat Strelka somatic files Possible values for this parameter: "
             "half / hemi / het / hom / half to assign one of the following genotypes to the resulting "
             "sample: 1 | 0/1 | 1/1 | ./1. "
             "This will replace all sample columns and replace them with a single one.",
         ),
         j.ToolInput(
             "gender",
             j.String(optional=True),
             prefix="--gender",
             doc=
             "({male,female,auto,none})  Specify gender. This determines how haploid calls on chrX "
             "get treated: for male samples, all non-ref calls (in the truthset only when "
             "running through hap.py) are given a 1/1 genotype.",
         ),
         j.ToolInput(
             "preprocessTruth",
             j.Boolean(optional=True),
             prefix="--preprocess-truth",
             doc="Preprocess truth file with same settings as query "
             "(default is to accept truth in original format).",
         ),
         j.ToolInput(
             "usefilteredTruth",
             j.Boolean(optional=True),
             prefix="--usefiltered-truth",
             doc="Use filtered variant calls in truth file "
             "(by default, only PASS calls in the truth file are used)",
         ),
         j.ToolInput(
             "preprocessingWindowSize",
             j.Boolean(optional=True),
             prefix="--preprocessing-window-size",
             doc=" Preprocessing window size (variants further apart than "
             "that size are not expected to interfere).",
         ),
         j.ToolInput(
             "adjustConfRegions",
             j.Boolean(optional=True),
             prefix="--adjust-conf-regions",
             doc=
             " Adjust confident regions to include variant locations. Note this will only include "
             "variants that are included in the CONF regions already when viewing with bcftools; "
             "this option only makes sure insertions are padded correctly in the CONF regions (to "
             "capture these, both the base before and after must be contained in the bed file).",
         ),
         j.ToolInput(
             "noAdjustConfRegions",
             j.Boolean(optional=True),
             prefix="--no-adjust-conf-regions",
             doc=" Do not adjust confident regions for insertions.",
         ),
         j.ToolInput(
             "noHaplotypeComparison",
             j.Boolean(optional=True),
             prefix="--no-haplotype-comparison",
             doc=
             "(--unhappy)  Disable haplotype comparison (only count direct GT matches as TP).",
         ),
         j.ToolInput(
             "windowSize",
             j.Int(optional=True),
             prefix="--window-size",
             doc=
             "(-w)  Minimum distance between variants such that they fall into the same superlocus.",
         ),
         j.ToolInput(
             "xcmpEnumerationThreshold",
             j.Int(optional=True),
             prefix="--xcmp-enumeration-threshold",
             doc=
             " Enumeration threshold / maximum number of sequences to enumerate per block.",
         ),
         j.ToolInput(
             "xcmpExpandHapblocks",
             j.String(optional=True),
             prefix="--xcmp-expand-hapblocks",
             doc=
             " Expand haplotype blocks by this many basepairs left and right.",
         ),
         j.ToolInput(
             "threads",
             j.Int(optional=True),
             prefix="--threads",
             default=j.CpuSelector(),
             doc="Number of threads to use. Comparison engine to use.",
         ),
         # j.ToolInput("engineVcfevalPath", j.String(optional=True), prefix="--engine-vcfeval-path",
         #           doc=" This parameter should give the path to the \"rtg\" executable. "
         #               "The default is /opt/hap.py/lib/python27/Haplo/../../../libexec/rtg- tools-install/rtg"),
         j.ToolInput(
             "engine",
             j.String(optional=True),
             prefix="--engine",
             doc=
             " {xcmp,vcfeval,scmp-somatic,scmp-distance} Comparison engine to use.",
         ),
         j.ToolInput(
             "engineVcfevalTemplate",
             j.String(optional=True),
             prefix="--engine-vcfeval-template",
             doc=
             " Vcfeval needs the reference sequence formatted in its own file format (SDF -- run rtg "
             "format -o ref.SDF ref.fa). You can specify this here to save time when running hap.py "
             "with vcfeval. If no SDF folder is specified, hap.py will create a temporary one.",
         ),
         j.ToolInput(
             "scmpDistance",
             j.Int(optional=True),
             prefix="--scmp-distance",
             doc=
             " For distance-based matching, this is the distance between variants to use.",
         ),
         j.ToolInput(
             "logfile",
             j.Filename(suffix="-log", extension=".txt"),
             prefix="--logfile",
             doc="Write logging information into file rather than to stderr",
         ),
         j.ToolInput(
             "verbose",
             j.Boolean(optional=True),
             prefix="--verbose",
             doc="Raise logging level from warning to info.",
         ),
         j.ToolInput(
             "quiet",
             j.Boolean(optional=True),
             prefix="--quiet",
             doc="Set logging level to output errors only.",
         ),
     ]
 def outputs(self):
     return [j.TOutput("out", j.String())]
    @staticmethod
    def code_block(inp: str) -> dict:
        # list splits "abc" into ["a", "b", "c"]
        return {"out": inp.upper()}

    def outputs(self):
        return [j.TOutput("out", j.String())]


# Now that we have our tool definitions, we can construct our workflow

# Workflow definition called "split_and_upper"
wf = j.WorkflowBuilder("split_and_upper")

# The workflow input, which is of type STRING
wf.input("input_text", j.String())

# Create a step called 'split', call `SplitText` that takes a string input called 'inp'.
# The output can be accessed by `wf.split.out`, which will have type Array(String).
wf.step("split", SplitText(inp=wf.input_text))

# Create a step called 'toupper', called the `ToUpper` tool which has a single
# input called 'inp'. Because we're operating on an array, we'll use the `scatter="inp"`
# argument, to scatter over the ToUpper's input called "inp".
# The output will be implicitly _gathered_, and hence `wf.toupper.out` will have type Array(String).
wf.step("toupper", ToUpper(inp=wf.split.out), scatter="inp")

# We'll use the gathered output, and use the janis `j.standard.JoinOperator(iterable, separator)`
# to join the array of strings.
wf.output("out", source=j.standard.JoinOperator(wf.toupper.out, ""))
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
Create a step whose value is dependent on multiplying an input number,
and the length of an list input. We construct the combination of operators
using fairly natural Python syntax.
"""

import janis_core as j
from janis_unix.tools import Echo

w = j.WorkflowBuilder("multiply")

w.input("number_input", int)
w.input("list_input", j.Array(j.String()))

# The input expression:
#   (w.number_input * w.list_input.length()).as_str()
# is equivalent to the following manually constructed:
#   j.AsStringOperator(j.MultiplyOperator(w.number_inputs, j.LengthOperator(w.list_input)))

w.step("multiply", Echo(inp=(w.number_input * w.list_input.length()).as_str()))

w.output("out",
         source=w.number_input.as_str() + w.multiply.out.contents() +
         "my-output")

if __name__ == "__main__":
    w.translate("wdl")
import janis_core as j

# Although not preferred, you can use Python primitives, and typing
# annotations in place of the Janis types:

#   str     - j.String()
#   int     - j.Int()
#   float   - j.Float()
#   bool    - j.Boolean()
#
#   typing.Optional[str]    - j.String(optional=True)
#   typing.List[str]        - j.Array(j.String())

workflow = j.WorkflowBuilder("typing_tests")

workflow.input("my_string_input_1", j.String())
workflow.input("my_string_input_2", str)

workflow.input("my_optional_str_input_1", j.String(optional=True))
workflow.input("my_optional_str_input_2", Optional[str])

workflow.input("my_list_of_strings_input_1", j.Array(j.String()))
workflow.input("my_list_of_strings_input_2", List[str])

workflow.input("my_optional_list_of_strings_1",
               j.Array(j.String(), optional=True))
workflow.input("my_optional_list_of_strings_2", Optional[List[str]])

workflow.input("my_list_of_optional_strings_1",
               j.Array(j.String(optional=True)))
workflow.input("my_list_of_optional_strings_2", List[Optional[str]])