Ejemplo n.º 1
0
def test_set_secondary_files(cls, secondary):
    obj = cls()
    i = obj.add_input(cwl.File(), id='in')
    o = obj.add_output(cwl.File(), id='out')
    obj.set_secondary_files('in', secondary)
    obj.set_secondary_files('out', secondary)
    assert i.secondary_files == secondary
    assert o.secondary_files == secondary
Ejemplo n.º 2
0
def test_expose_except(wf):
    t = CommandLineTool(id='test')
    t.add_input(cwl.Int(default=10, required=True), id='x')
    t.add_input(cwl.String(), id='y')
    t.add_output(cwl.File(glob='something', required=True), id='out')
    wf.add_step(t, expose_except=['y', 'out'])
    assert wf.inputs == [WorkflowInput(id='x', label='x', type=Primitive.INT)]
    assert wf.outputs == []
Ejemplo n.º 3
0
from sbg import cwl

with cwl.tool('tool1.cwl', 'w') as t:
    t.id = 'tool1'
    t.base_command = ['grep']
    t.stdout = '_output_'
    t.add_input(cwl.String(required=True),
                'pattern',
                label='pattern',
                input_binding=cwl.InputBinding(shell_quote=False, position=0))

    t.add_input(cwl.File(required=True),
                'inFile',
                label='inFile',
                input_binding=cwl.InputBinding(shell_quote=False, position=1))

    t.add_output(cwl.File(required=True),
                 'out',
                 label='Out',
                 output_binding=cwl.OutputBinding(glob='_output_'))

    t.add_requirement(cwl.Docker(docker_pull='ubuntu:16.04'))
    # required if we want to disable shell_quote
    t.add_requirement(cwl.ShellCommand())
Ejemplo n.º 4
0
import pysam
from sbg import cwl


@cwl.to_tool(
    inputs=dict(
        bam_file=cwl.File(secondary_files=['.bai']),
        bed_file=cwl.File()
    ),
    outputs=dict(out=cwl.File(glob='gc_content.txt')),
    docker='images.sbgenomics.com/filip_tubic/ubuntu1604pysam'
)
def gc_content(bam_file, bed_file):
    """Calculate GC content."""

    bam_file = bam_file['path']
    bed_file = bed_file['path']
    bam = pysam.AlignmentFile(bam_file, 'rb')
    with open('gc_content.txt', 'w') as out:
        with open(bed_file) as bf:
            for line in bf:
                line_parts = line.strip().split()
                chr = line_parts[0]
                start = int(line_parts[1])
                end = int(line_parts[2])
                read_data = bam.fetch(chr, start, end)
                total_bases = 0
                gc_bases = 0
                for read in read_data:
                    seq = read.query_sequence
                    total_bases += len(seq)
Ejemplo n.º 5
0
from sbg import cwl
import textwrap

cwl.from_bash(
    label='Example tool',
    inputs=dict(
        HELLO="HELLO WORLD",
        STR=cwl.String(),
        INT=cwl.Int(),
        FLOAT=cwl.Float(),
        BOOL=cwl.Bool(),
        ANY=cwl.Any(),
        FILE=cwl.File(),
        DIR=cwl.Dir(),
        ENUM=cwl.Enum(['opt1', 'opt2']),
        INT_OR_STR=cwl.Union([cwl.Int(), cwl.String()]),
        # with default value
        STR_DEF=cwl.String(default="hello"),
        INT_DEF=cwl.Int(default=123),
        FLOAT_DEF=cwl.Float(default=24.42),
        BOOL_DEF=cwl.Bool(default=True),
        ANY_DEF=cwl.Any(default="whatever"),
        ENUM_DEF=cwl.Enum(['opt1', 'opt2'], default='opt2'),
        INT_OR_STR_DEF=cwl.Union([cwl.Int(), cwl.String()], default=22)),
    outputs=dict(out=cwl.File(glob='stdout')),
    script=textwrap.dedent(r"""
        echo $HELLO
        echo $STR
        echo $INT
        echo $FLOAT
        echo $BOOL
Ejemplo n.º 6
0
def outputs():
    return dict(
        out_str=cwl.String(),
        out_glob_star=cwl.Array(cwl.File(), glob='*.txt'),
        out_glob=cwl.File(glob="some_name")
    )
Ejemplo n.º 7
0
def strelka(
    normal_bam: cwl.File(secondary_files='.bai',
                         doc='Normal sample BAM or CRAM file.'),
    tumor_bam: cwl.File(secondary_files='.bai',
                        doc='Tumor sample BAM or CRAM file.',
                        required=True),
    reference_fasta: cwl.File(
        secondary_files='.fai',
        doc='samtools-indexed reference fasta file [required]'),
    indel_candidates: cwl.File(
        doc='Specify a VCF of candidate indel alleles. These alleles are always '
        'evaluated but only reported in the output when they are inferred to '
        'exist in the sample. The VCF must be tabix indexed. All indel alleles'
        ' must be left-shifted/normalized, any unnormalized alleles will be '
        'ignored. This option may be specified more than once, multiple input '
        'VCFs will be merged.',
        default='None') = None,
    forced_gt: cwl.File(
        doc="Specify a VCF of candidate alleles. "
        "These alleles are always evaluated and "
        "reported even if they are unlikely to exist in the "
        "sample. The VCF must be tabix indexed. All indel "
        "alleles must be left-shifted/normalized, any unnormalized "
        "allele will trigger a runtime error. This option may "
        "be specified more than once, multiple input VCFs will "
        "be merged. Note that for any SNVs provided in the VCF, "
        "the SNV site will be reported (and for gVCF, excluded "
        "from block compression), but the specific SNV "
        "alleles are ignored.",
        default='None') = None,
    exome: cwl.Bool(
        doc="Set options for exome or other targeted input: note in "
        "particular that this flag turns off high-depth filters") = False,
    call_regions: cwl.File(
        doc="Optionally provide a bgzip-compressed/tabix-indexed BED "
        "file containing the set of regions to call. No VCF "
        "output will be provided outside of these regions. "
        "The full genome will still be used to estimate statistics "
        "from the input (such as expected depth per chromosome). "
        "Only one BED file may be specified.",
        default='Call the entire genome') = None,
    scan_size_mb: cwl.Int(
        doc="Maximum sequence region size (in megabases) scanned by "
        "each task during genome variant calling. (default: 12)",
        default=12) = 12,
    region: cwl.String(
        doc="Limit the analysis to one or more genome region(s) for "
        "debugging purposes. If this argument is provided multiple"
        " times the union of all specified regions will be analyzed. "
        "All regions must be non-overlapping to get a meaningful "
        "result. Examples: '--region chr20' (whole chromosome), "
        "'--region chr2:100-2000 --region chr3:2500-3000' "
        "(two regions)'. If this option is specified (one or more times) "
        "together with the --callRegions BED file, then all "
        "region arguments will be intersected with the "
        "callRegions BED track.",
        default='None') = None):
    """

    :param normal_bam:
    :param tumor_bam:
    :param reference_fasta:
    :param indel_candidates:
    :param forced_gt:
    :param exome:
    :param call_regions:
    :param scan_size_mb:
    :param region:
    :return:
    """
    strelka_config_path = '/opt/bin/configureStrelkaSomaticWorkflow.py'
    strelka_cmd = [strelka_config_path]
    strelka_cmd += ['--normalBam', normal_bam['path']]
    strelka_cmd += ['--tumorBam', tumor_bam['path']]
    strelka_cmd += ['--referenceFasta', reference_fasta['path']]
    strelka_cmd += ['--runDir', '.']

    if indel_candidates:
        strelka_cmd += ['--indelCandidates', indel_candidates['path']]

    if forced_gt:
        strelka_cmd += ['--forcedGT', forced_gt['path']]

    if exome:
        strelka_cmd += ['--exome']

    if call_regions:
        strelka_cmd += ['--callRegions', call_regions['path']]

    strelka_cmd += ['--scanSizeMb', str(scan_size_mb)]
    if region:
        strelka_cmd += ['--region', region]
    check_output(strelka_cmd)
    check_call(['python', 'runWorkflow.py', '-m', 'local', '-j', '8'])
Ejemplo n.º 8
0
from sbg import cwl
from tools.doc import TOOL_DOC
from subprocess import check_call, check_output


@cwl.to_tool(outputs=dict(
    stats_log=cwl.File(glob='results/stats/runStats.tsv'),
    somatic_snvs=cwl.File(glob='results/variants/somatic.snvs.vcf.gz',
                          secondary_files='.tbi',
                          doc="All somatic SNVs inferred in the tumor sample"),
    somatic_snvs_tbi=cwl.File(glob='results/variants/somatic.snvs.vcf.gz.tbi'),
    somatic_indels=cwl.File(
        glob='results/variants/somatic.indels.vcf.gz',
        secondary_files='.tbi',
        doc="All somatic Indels inferred in the tumor sample"),
    somatic_indels_tbi=cwl.File(
        glob='results/variants/somatic.indels.vcf.gz.tbi')),
             docker='images.sbgenomics.com/gavrilo_andric/strelka:1',
             label="Strelka 2.9.7")
def strelka(
    normal_bam: cwl.File(secondary_files='.bai',
                         doc='Normal sample BAM or CRAM file.'),
    tumor_bam: cwl.File(secondary_files='.bai',
                        doc='Tumor sample BAM or CRAM file.',
                        required=True),
    reference_fasta: cwl.File(
        secondary_files='.fai',
        doc='samtools-indexed reference fasta file [required]'),
    indel_candidates: cwl.File(
        doc='Specify a VCF of candidate indel alleles. These alleles are always '
        'evaluated but only reported in the output when they are inferred to '