Beispiel #1
0
def test_auto_expose_all(wf):
    t = CommandLineTool(id='t')

    input = t.add_input(cwl.String(), 'input')
    output = t.add_output(cwl.String(), 'output')

    wf.add_step(t)

    assert wf.inputs == [
        WorkflowInput(id='input',
                      label='input',
                      type=Workflow.set_required(Primitive.STRING, False))
    ]

    assert wf.outputs == [
        WorkflowOutput(id='output',
                       label='output',
                       output_source='{}/{}'.format(t.id, output.id),
                       type=Workflow.set_required(Primitive.STRING, False))
    ]

    assert wf.steps[0] == Step(id=t.id,
                               in_=[StepInput(input.id, source=input.id)],
                               out=[StepOutput(output.id)],
                               run=t)
Beispiel #2
0
def test_add_connection_wf_input(wf):
    wf.add_input(cwl.String(), 'input_str')

    t1 = CommandLineTool(id='t1')
    t1.add_input(cwl.String(), 'input_str')

    wf.add_step(t1, expose=[])

    wf.add_connection('input_str', 't1.input_str')

    assert wf.steps[0].in_ == [StepInput('input_str', source='input_str')]
Beispiel #3
0
def test_add_connection_wf_output(wf):
    wf.add_output(cwl.String(), 'output_str')

    t1 = CommandLineTool(id='t1')
    t1.add_output(cwl.String(), 'output_str')

    wf.add_step(t1, expose=[])

    wf.add_connection('t1.output_str', 'output_str')

    assert wf.outputs[0] == WorkflowOutput(id='output_str',
                                           output_source='t1/output_str',
                                           type=set_required(
                                               Primitive.STRING, False))
Beispiel #4
0
def inputs():
    return dict(
        input_str=cwl.String(),
        input_int=cwl.Int(),
        input_bool=cwl.Bool(),
        input_float=cwl.Float(),
        input_enum=cwl.Enum(['a', 'b', 'c']),
        input_record=cwl.Record(dict(k1=cwl.String())),
        input_array=cwl.Array(cwl.Int()),
        input_no_type=cwl.Any(),
        input_union=cwl.Union([cwl.Int(), cwl.String()]),
        input_nonreq_primitive=cwl.String(),
        input_nonreq_none=cwl.String(),
        input_nonreq_object=cwl.Enum(['a', 'b', 'c'])
    )
Beispiel #5
0
def test_add_connection_inner_nodes(wf):
    t1 = CommandLineTool(id='t1')
    t2 = CommandLineTool(id='t2')

    t1.add_input(cwl.String(), 'input')
    t1.add_output(cwl.String(), 'output')

    t2.add_input(cwl.String(), 'input')
    t2.add_output(cwl.String(), 'output')

    wf.add_step(t1, expose=[])
    wf.add_step(t2, expose=[])

    wf.add_connection('t1.output', 't2.input')

    assert wf.steps[0].out == [StepOutput('output')]
    assert wf.steps[1].in_ == [StepInput('input', source='t1/output')]
Beispiel #6
0
def test_expose_except(wf):
    t = CommandLineTool(id='test')
    t.add_input(cwl.Int(default=10, required=True), id='x')
    t.add_input(cwl.String(), id='y')
    t.add_output(cwl.File(glob='something', required=True), id='out')
    wf.add_step(t, expose_except=['y', 'out'])
    assert wf.inputs == [WorkflowInput(id='x', label='x', type=Primitive.INT)]
    assert wf.outputs == []
Beispiel #7
0
from sbg import cwl

with cwl.tool('tool1.cwl', 'w') as t:
    t.id = 'tool1'
    t.base_command = ['grep']
    t.stdout = '_output_'
    t.add_input(cwl.String(required=True),
                'pattern',
                label='pattern',
                input_binding=cwl.InputBinding(shell_quote=False, position=0))

    t.add_input(cwl.File(required=True),
                'inFile',
                label='inFile',
                input_binding=cwl.InputBinding(shell_quote=False, position=1))

    t.add_output(cwl.File(required=True),
                 'out',
                 label='Out',
                 output_binding=cwl.OutputBinding(glob='_output_'))

    t.add_requirement(cwl.Docker(docker_pull='ubuntu:16.04'))
    # required if we want to disable shell_quote
    t.add_requirement(cwl.ShellCommand())
Beispiel #8
0
from sbg import cwl


# First node
@cwl.to_tool(inputs=dict(x=cwl.String()),
             outputs=dict(out=cwl.Float(required=True)),
             docker='images.sbgenomics.com/filip_tubic/ubuntu1604py')
def to_float(x):
    return dict(out=float(x))


# Second node
@cwl.to_tool(inputs=dict(x=cwl.Float(), n=cwl.Int()),
             outputs=dict(out=cwl.Float()),
             docker='images.sbgenomics.com/filip_tubic/ubuntu1604py')
def times_n(x, n=10):
    return dict(out=x * n)


with cwl.workflow('wf.cwl', 'w') as wf:
    # create tools
    t1 = to_float()
    t2 = times_n()

    # steps
    wf.add_step(t1, expose=['x'])
    wf.add_step(t2, expose=['n', 'out'])

    # add connections
    wf.add_connection('{}.out'.format(t1.id), '{}.x'.format(t2.id))
Beispiel #9
0
from sbg import cwl
import textwrap

cwl.from_bash(
    label='Example tool',
    inputs=dict(
        HELLO="HELLO WORLD",
        STR=cwl.String(),
        INT=cwl.Int(),
        FLOAT=cwl.Float(),
        BOOL=cwl.Bool(),
        ANY=cwl.Any(),
        FILE=cwl.File(),
        DIR=cwl.Dir(),
        ENUM=cwl.Enum(['opt1', 'opt2']),
        INT_OR_STR=cwl.Union([cwl.Int(), cwl.String()]),
        # with default value
        STR_DEF=cwl.String(default="hello"),
        INT_DEF=cwl.Int(default=123),
        FLOAT_DEF=cwl.Float(default=24.42),
        BOOL_DEF=cwl.Bool(default=True),
        ANY_DEF=cwl.Any(default="whatever"),
        ENUM_DEF=cwl.Enum(['opt1', 'opt2'], default='opt2'),
        INT_OR_STR_DEF=cwl.Union([cwl.Int(), cwl.String()], default=22)),
    outputs=dict(out=cwl.File(glob='stdout')),
    script=textwrap.dedent(r"""
        echo $HELLO
        echo $STR
        echo $INT
        echo $FLOAT
        echo $BOOL
Beispiel #10
0
from sbg import cwl

with cwl.workflow('scatter_single.cwl', 'w') as wf:
    t1 = cwl.ExpressionTool('$({"out": inputs.word })', id='expr_tool1')
    t1.add_input(cwl.String(required=True), id='word', label='Word')
    t1.add_output(cwl.String(required=True), id='out', label='Word out')

    t2 = cwl.ExpressionTool(
        '$({"out": inputs.word.map(function(x){ return x.toLowerCase()}) })',
        id='expr_tool2')
    t2.add_input(cwl.Array(cwl.String(), required=True),
                 id='word',
                 label='Word')
    t2.add_output(cwl.Array(cwl.String(), required=True),
                  id='out',
                  label='Word out')

    wf.add_step(t1, expose=['word'], scatter=['word'])
    wf.add_step(t2, expose=['out'])

    wf.add_connection('expr_tool1.out', 'expr_tool2.word')

    wf.add_requirement(cwl.InlineJavascript())
    wf.add_requirement(cwl.ScatterFeature())
Beispiel #11
0
from sbg import cwl

with cwl.workflow('expr_example.cwl', 'w') as wf:
    t = cwl.ExpressionTool('$({"out": inputs.word })', id='expr_tool')

    t.add_input(cwl.String(required=True), id='word', label='Word')
    t.add_output(cwl.String(required=True), id='out', label='Word out')

    wf.add_step(t)
    wf.add_requirement(cwl.InlineJavascript())
Beispiel #12
0
    ShellCommand(),
    Resource(),
    Docker(docker_pull='something'), InlineJavascript(),
    EnvVar(EnvironmentDef('foo', 'bar')),
    InitialWorkDir([Dirent('hello world', 'foo.txt')]),
    EnvVar(EnvironmentDef('foo', 'bar'))
])
def test_find_requirement(req, cls):
    obj = cls()
    obj.add_requirement(req)
    assert obj.find_requirement(req.class_) == req


@pytest.mark.parametrize('cls', [CommandLineTool, Workflow])
@pytest.mark.parametrize('type', [
    cwl.String(), cwl.Int(), cwl.Float(), cwl.Bool(), cwl.Record(), cwl.Enum(),
    cwl.Array(cwl.Int())
])
def test_get_port(type, cls):
    obj = cls()
    i = obj.add_input(type, id='in')
    o = obj.add_output(type, id='out')

    assert obj.get_port('in') == i
    assert obj.get_port('out') == o


@pytest.mark.parametrize('cls', [CommandLineTool, Workflow])
@pytest.mark.parametrize('type', [
    cwl.String(), cwl.Int(), cwl.Float(), cwl.Bool(), cwl.Record(), cwl.Enum(),
    cwl.Array(cwl.Int())
Beispiel #13
0
    tool.unarchive_bundle(bundle, encoded=encoded, postprocess=postprocess)
    assert tool.arguments[0] == arg


def make_f(t, r=inspect._empty):
    """Argument `t` is type hint for argument `x` of function `f`."""
    def f(x):
        pass

    f.to_tool_args = {'inputs': dict(x=t), 'outputs': r}
    return f


@pytest.mark.parametrize('hint', [
    cwl.Int(),
    cwl.String(),
    cwl.Float(),
    cwl.Bool(),
    cwl.Record(),
    cwl.Enum()
])
def test_inputs_from_f(tool, hint):
    f = make_f(hint)
    inputs = tool._inputs_from_f(f)
    i = inputs[0]
    assert i['id'] == 'x'
    assert i['type'] == hint


@pytest.mark.parametrize('hint', [
    cwl.Int(),
Beispiel #14
0
def outputs():
    return dict(
        out_str=cwl.String(),
        out_glob_star=cwl.Array(cwl.File(), glob='*.txt'),
        out_glob=cwl.File(glob="some_name")
    )
Beispiel #15
0
def strelka(
    normal_bam: cwl.File(secondary_files='.bai',
                         doc='Normal sample BAM or CRAM file.'),
    tumor_bam: cwl.File(secondary_files='.bai',
                        doc='Tumor sample BAM or CRAM file.',
                        required=True),
    reference_fasta: cwl.File(
        secondary_files='.fai',
        doc='samtools-indexed reference fasta file [required]'),
    indel_candidates: cwl.File(
        doc='Specify a VCF of candidate indel alleles. These alleles are always '
        'evaluated but only reported in the output when they are inferred to '
        'exist in the sample. The VCF must be tabix indexed. All indel alleles'
        ' must be left-shifted/normalized, any unnormalized alleles will be '
        'ignored. This option may be specified more than once, multiple input '
        'VCFs will be merged.',
        default='None') = None,
    forced_gt: cwl.File(
        doc="Specify a VCF of candidate alleles. "
        "These alleles are always evaluated and "
        "reported even if they are unlikely to exist in the "
        "sample. The VCF must be tabix indexed. All indel "
        "alleles must be left-shifted/normalized, any unnormalized "
        "allele will trigger a runtime error. This option may "
        "be specified more than once, multiple input VCFs will "
        "be merged. Note that for any SNVs provided in the VCF, "
        "the SNV site will be reported (and for gVCF, excluded "
        "from block compression), but the specific SNV "
        "alleles are ignored.",
        default='None') = None,
    exome: cwl.Bool(
        doc="Set options for exome or other targeted input: note in "
        "particular that this flag turns off high-depth filters") = False,
    call_regions: cwl.File(
        doc="Optionally provide a bgzip-compressed/tabix-indexed BED "
        "file containing the set of regions to call. No VCF "
        "output will be provided outside of these regions. "
        "The full genome will still be used to estimate statistics "
        "from the input (such as expected depth per chromosome). "
        "Only one BED file may be specified.",
        default='Call the entire genome') = None,
    scan_size_mb: cwl.Int(
        doc="Maximum sequence region size (in megabases) scanned by "
        "each task during genome variant calling. (default: 12)",
        default=12) = 12,
    region: cwl.String(
        doc="Limit the analysis to one or more genome region(s) for "
        "debugging purposes. If this argument is provided multiple"
        " times the union of all specified regions will be analyzed. "
        "All regions must be non-overlapping to get a meaningful "
        "result. Examples: '--region chr20' (whole chromosome), "
        "'--region chr2:100-2000 --region chr3:2500-3000' "
        "(two regions)'. If this option is specified (one or more times) "
        "together with the --callRegions BED file, then all "
        "region arguments will be intersected with the "
        "callRegions BED track.",
        default='None') = None):
    """

    :param normal_bam:
    :param tumor_bam:
    :param reference_fasta:
    :param indel_candidates:
    :param forced_gt:
    :param exome:
    :param call_regions:
    :param scan_size_mb:
    :param region:
    :return:
    """
    strelka_config_path = '/opt/bin/configureStrelkaSomaticWorkflow.py'
    strelka_cmd = [strelka_config_path]
    strelka_cmd += ['--normalBam', normal_bam['path']]
    strelka_cmd += ['--tumorBam', tumor_bam['path']]
    strelka_cmd += ['--referenceFasta', reference_fasta['path']]
    strelka_cmd += ['--runDir', '.']

    if indel_candidates:
        strelka_cmd += ['--indelCandidates', indel_candidates['path']]

    if forced_gt:
        strelka_cmd += ['--forcedGT', forced_gt['path']]

    if exome:
        strelka_cmd += ['--exome']

    if call_regions:
        strelka_cmd += ['--callRegions', call_regions['path']]

    strelka_cmd += ['--scanSizeMb', str(scan_size_mb)]
    if region:
        strelka_cmd += ['--region', region]
    check_output(strelka_cmd)
    check_call(['python', 'runWorkflow.py', '-m', 'local', '-j', '8'])