예제 #1
0
def rnaseq(samples):
    prep = [
        s("prep_samples",
          "multi-parallel", [["files"], dd.get_keys("sample_name")],
          [cwlout(["files"], "File")],
          "bcbio",
          programs=["picard"])
    ]
    align = [
        s("process_alignment", "multi-parallel",
          [["files"], ["reference", "fasta", "base"], ["analysis"],
           ["rgnames", "pl"], ["rgnames", "sample"], ["rgnames", "pu"],
           ["rgnames", "lane"], ["rgnames", "rg"], ["rgnames", "lb"],
           ["reference", "aligner", "indexes"],
           ["config", "algorithm", "aligner"],
           ["genome_resources", "rnaseq", "transcripts"],
           ["config", "algorithm", "quality_format"]], [
               cwlout(["work_bam"], "File", [".bai"]),
               cwlout(["align_bam"], "File", [".bai"])
           ], "bcbio-vc", ["aligner", "samtools", "sambamba", "seqtk"],
          {"files": 1.5})
    ]
    quantitate = [
        s("rnaseq_quantitate",
          "multi-parallel", [["files"],
                             dd.get_keys("work_bam"),
                             dd.get_keys("gtf_file"),
                             dd.get_keys("ref_file"),
                             dd.get_keys("genome_build")], [
                                 cwlout(dd.get_keys("count_file"), "File"),
                                 cwlout(dd.get_keys("sailfish_dir"), "File")
                             ],
          "bcbio",
          programs=["sailfish"],
          disk={"files": 1.5})
    ]
    qc = [
        s("pipeline_summary", "multi-parallel",
          [["align_bam"], ["analysis"], ["reference", "fasta", "base"],
           ["config", "algorithm", "qc"]], [
               cwlout(["summary", "qc", "samtools"], "File"),
               cwlout(["summary", "qc", "fastqc"], "File")
           ], "bcbio", ["samtools", "fastqc"]),
        s("multiqc_summary", "multi-combined",
          [["genome_build"], ["summary", "qc", "samtools"],
           ["summary", "qc", "fastqc"], ["reference", "fasta", "base"],
           ["config", "algorithm", "coverage_interval"]],
          [cwlout(["summary", "multiqc"], ["File", "null"])], "bcbio")
    ]

    steps = prep + align + quantitate + qc
    final_outputs = [
        dd.get_keys("work_bam"),
        dd.get_keys("sailfish_dir"), ["summary", "multiqc"]
    ]
    return steps, final_outputs
예제 #2
0
def rnaseq(samples):
    prep = [s("prep_samples", "multi-parallel",
              [["files"],
               dd.get_keys("sample_name")],
              [cwlout(["files"], "File")],
              "bcbio", programs=["picard"])]
    align = [s("process_alignment", "multi-parallel",
               [["files"], ["reference", "fasta", "base"],
                ["analysis"],
                ["rgnames", "pl"], ["rgnames", "sample"], ["rgnames", "pu"],
                ["rgnames", "lane"], ["rgnames", "rg"], ["rgnames", "lb"],
                ["reference", "aligner", "indexes"],
                ["config", "algorithm", "aligner"],
                ["genome_resources", "rnaseq", "transcripts"],
                ["config", "algorithm", "quality_format"]],
               [cwlout(["work_bam"], "File", [".bai"]),
                cwlout(["align_bam"], "File", [".bai"])],
               "bcbio-vc", ["aligner", "samtools", "sambamba", "seqtk"],
               {"files": 1.5})]
    quantitate = [s("rnaseq_quantitate", "multi-parallel",
                  [["files"],
                   dd.get_keys("work_bam"),
                   dd.get_keys("gtf_file"),
                   dd.get_keys("ref_file"),
                   dd.get_keys("genome_build")],
                  [cwlout(dd.get_keys("count_file"), "File"),
                   cwlout(dd.get_keys("sailfish_dir"), "File")],
                  "bcbio", programs=["sailfish"],
                  disk={"files": 1.5})]
    qc = [s("pipeline_summary", "multi-parallel",
            [["align_bam"], ["analysis"], ["reference", "fasta", "base"],
             ["config", "algorithm", "qc"]],
            [cwlout(["summary", "qc", "samtools"], "File"),
             cwlout(["summary", "qc", "fastqc"], "File")],
            "bcbio", ["samtools", "fastqc"]),
          s("multiqc_summary", "multi-combined",
            [["genome_build"], ["summary", "qc", "samtools"], ["summary", "qc", "fastqc"],
             ["reference", "fasta", "base"], ["config", "algorithm", "coverage_interval"]],
            [cwlout(["summary", "multiqc"], ["File", "null"])],
            "bcbio")]

    steps = prep + align + quantitate + qc
    final_outputs = [dd.get_keys("work_bam"), dd.get_keys("sailfish_dir"),
                     ["summary", "multiqc"]]
    return steps, final_outputs
예제 #3
0
def rnaseq(samples):
    prep = [s("prepare_sample", "multi-parallel",
              [["files"], dd.get_keys("sample_name"),
               dd.get_keys("ref_file"), dd.get_keys("genome_build"), dd.get_keys("gtf_file"),
               ["analysis"],
               ["rgnames", "pl"], ["rgnames", "pu"], ["rgnames", "lane"], ["rgnames", "rg"], ["rgnames", "lb"],
               ["reference", "aligner", "indexes"],
               ["config", "algorithm", "aligner"],
               ["config", "algorithm", "expression_caller"],
               ["config", "algorithm", "quality_format"]],
              [cwlout("prep_rec", "record")],
              "bcbio-rnaseq", programs=["picard", "samtools", "pysam>=0.13.0"]),
            s("trim_sample", "multi-parallel",
              [["prep_rec"]],
              [cwlout("trim_rec", "record")],
              "bcbio-rnaseq", programs=["atropos;env=python3"])]
    align = [s("process_alignment", "multi-parallel",
               [["trim_rec"]],
               [cwlout(["work_bam"], "File", [".bai"])],
               "bcbio-rnaseq", ["star", "hisat2", "tophat", "samtools",
                                "sambamba", "seqtk"],
               {"files": 1.5})]
    quantitate = [s("rnaseq_quantitate", "multi-parallel",
                  [["trim_rec"], ["work_bam"]],
                  [cwlout(dd.get_keys("count_file"), "File"),
                   cwlout(["quant", "tsv"], "File"),
                   cwlout(["quant", "hdf5"], "File")],
                  "bcbio-rnaseq", programs=["sailfish", "salmon", "kallisto>=0.43.1", "subread", "gffread",
                                            "r=3.4.1", "r-wasabi"],
                  disk={"files": 0.5})]
    qc = [s("qc_to_rec", "multi-combined",
            [["work_bam"], ["analysis"], ["reference", "fasta", "base"], dd.get_keys("gtf_file"),
             ["genome_build"], ["config", "algorithm", "coverage_interval"],
             ["config", "algorithm", "tools_on"], ["config", "algorithm", "tools_off"],
             ["config", "algorithm", "qc"]],
            [cwlout("qc_rec", "record")],
            "bcbio-rnaseq", disk={"files": 1.5}, cores=1, no_files=True),
          s("pipeline_summary", "multi-parallel",
            [["qc_rec"]],
            [cwlout("qcout_rec", "record",
                    fields=[cwlout(["summary", "qc"], ["File", "null"]),
                            cwlout(["summary", "metrics"], ["string", "null"]),
                            cwlout("inherit")])],
            "bcbio-rnaseq", ["bedtools", "fastqc", "goleft", "hts-nim-tools", "mosdepth",
                             "picard", "pythonpy", "qsignature", "qualimap",
                             "sambamba", "samtools"]),
          s("multiqc_summary", "multi-combined",
            [["qcout_rec"]],
            [cwlout(["summary", "multiqc"], ["File", "null"])],
            "bcbio-rnaseq", ["multiqc", "multiqc-bcbio"])]

    steps = prep + align + quantitate + qc
    final_outputs = [dd.get_keys("work_bam"), ["quant", "tsv"], ["summary", "multiqc"]]
    return steps, final_outputs
예제 #4
0
def fastrnaseq():
    prep = [s("prep_samples", "multi-parallel",
              [["files"],
               dd.get_keys("sample_name")],
              [cwlout(["files"], "File")],
              programs=["picard"])]
    quant = [s("run_salmon_reads", "multi-parallel",
               [["files"],
                dd.get_keys("sample_name"),
                dd.get_keys("gtf_file"),
                dd.get_keys("ref_file"),
                dd.get_keys("genome_build")],
               [cwlout(dd.get_keys("sailfish_dir"), "File")],
               programs=["salmon"],
               disk={"files": 1.5})]
    steps = quant
    final_outputs = [dd.get_keys('sailfish_dir')]
    return steps, final_outputs
예제 #5
0
def fastrnaseq(samples):
    prep = [s("prep_samples", "multi-parallel",
              [["files"],
               dd.get_keys("sample_name")],
              [cwlout(["files"], "File")],
              "bcbio", programs=["picard"])]
    quant = [s("run_salmon_reads", "multi-parallel",
               [["files"],
                dd.get_keys("sample_name"),
                dd.get_keys("gtf_file"),
                dd.get_keys("ref_file"),
                dd.get_keys("genome_build")],
               [cwlout(dd.get_keys("sailfish_dir"), "File")],
               "bcbio", programs=["salmon"],
               disk={"files": 1.5})]
    steps = quant
    final_outputs = [dd.get_keys('sailfish_dir')]
    return steps, final_outputs
예제 #6
0
def rnaseq(samples):
    checkpoints = _rnaseq_checkpoints(samples)
    prep = [
        s("prepare_sample",
          "multi-parallel",
          [["files"],
           dd.get_keys("sample_name"),
           dd.get_keys("ref_file"),
           dd.get_keys("genome_build"),
           dd.get_keys("gtf_file"), ["analysis"], ["rgnames", "pl"],
           ["rgnames", "pu"], ["rgnames", "lane"], ["rgnames", "rg"],
           ["rgnames", "lb"], ["reference", "aligner", "indexes"],
           ["config", "algorithm", "aligner"],
           ["config", "algorithm", "expression_caller"],
           ["config", "algorithm", "fusion_caller"],
           ["config", "algorithm", "quality_format"]],
          [cwlout("prep_rec", "record")],
          "bcbio-rnaseq",
          programs=["picard", "samtools", "pysam>=0.13.0"]),
        s("trim_sample",
          "multi-parallel", [["prep_rec"]], [cwlout("trim_rec", "record")],
          "bcbio-rnaseq",
          programs=["atropos;env=python3"])
    ]
    align = [
        s("process_alignment", "multi-parallel", [["trim_rec"]],
          [cwlout(["align_bam"], "File", [".bai"])], "bcbio-rnaseq", [
              "star", "hisat2", "tophat;env=python2", "samtools", "sambamba",
              "seqtk"
          ], {"files": 1.5})
    ]
    if checkpoints.get("vc"):
        pp_align, pp_align_out = _postprocess_alignment(checkpoints)
    else:
        pp_align, pp_align_out = [], []
    quantitate = [
        s("rnaseq_quantitate",
          "multi-parallel", [["trim_rec"], ["align_bam"]], [
              cwlout(dd.get_keys("count_file"), "File"),
              cwlout(["quant", "tsv"], "File"),
              cwlout(["quant", "hdf5"], "File"),
              cwlout(["quant", "fusion"], "File")
          ],
          "bcbio-rnaseq",
          programs=[
              "sailfish", "salmon", "kallisto>=0.43.1", "subread", "gffread",
              "r=3.4.1", "r-base=3.4.1=h4fe35fd_8", "xorg-libxt", "r-wasabi"
          ],
          disk={"files": 0.5})
    ]
    qc = [
        s("qc_to_rec",
          "multi-combined",
          [["align_bam"], ["analysis"], ["reference", "fasta", "base"],
           dd.get_keys("gtf_file"), ["genome_build"],
           ["config", "algorithm", "coverage_interval"],
           ["config", "algorithm", "tools_on"],
           ["config", "algorithm", "tools_off"], ["config", "algorithm", "qc"]
           ], [cwlout("qc_rec", "record")],
          "bcbio-rnaseq",
          disk={"files": 1.5},
          cores=1,
          no_files=True),
        s("pipeline_summary", "multi-parallel", [["qc_rec"]], [
            cwlout("qcout_rec",
                   "record",
                   fields=[
                       cwlout(["summary", "qc"], ["File", "null"]),
                       cwlout(["summary", "metrics"], ["string", "null"]),
                       cwlout("inherit")
                   ])
        ], "bcbio-rnaseq", [
            "bedtools", "fastqc=0.11.7=5", "goleft", "hts-nim-tools",
            "mosdepth", "picard", "pythonpy", "qsignature", "qualimap",
            "sambamba", "samtools"
        ]),
        s("multiqc_summary", "multi-combined", [["qcout_rec"]],
          [cwlout(["summary", "multiqc"], ["File", "null"])], "bcbio-rnaseq",
          ["multiqc", "multiqc-bcbio"])
    ]
    vc, vc_out = _variant_vc(checkpoints)
    fusion = [
        s("detect_fusions", "multi-parallel",
          [["quant", "fusion"], ["quant", "hdf5"], ["trim_rec"]], [
              cwlout(["fusion", "fasta"], "File"),
              cwlout(["fusion", "json"], "File")
          ], "bcbio-rnaseq", ["pizzly"])
    ]

    steps = prep + align + pp_align + quantitate + qc + vc + fusion
    final_outputs = [["rgnames", "sample"], dd.get_keys("align_bam"), ["quant", "tsv"], ["summary", "multiqc"]] + \
                    vc_out + pp_align_out
    return steps, final_outputs
예제 #7
0
def rnaseq(samples):
    checkpoints = _rnaseq_checkpoints(samples)
    prep = [s("prepare_sample", "multi-parallel",
              [["files"], dd.get_keys("sample_name"),
               dd.get_keys("ref_file"), dd.get_keys("genome_build"), dd.get_keys("gtf_file"),
               ["analysis"],
               ["rgnames", "pl"], ["rgnames", "pu"], ["rgnames", "lane"],
               ["rgnames", "rg"], ["rgnames", "lb"],
               ["reference", "aligner", "indexes"],
               ["config", "algorithm", "aligner"],
               ["config", "algorithm", "expression_caller"],
               ["config", "algorithm", "fusion_caller"],
               ["config", "algorithm", "quality_format"]],
              [cwlout("prep_rec", "record")],
              "bcbio-rnaseq", programs=["picard", "samtools", "pysam>=0.13.0"]),
            s("trim_sample", "multi-parallel",
              [["prep_rec"]],
              [cwlout("trim_rec", "record")],
              "bcbio-rnaseq", programs=["atropos;env=python3"])]
    align = [s("process_alignment", "multi-parallel",
               [["trim_rec"]],
               [cwlout(["align_bam"], "File", [".bai"])],
               "bcbio-rnaseq", ["star", "hisat2", "tophat;env=python2", "samtools",
                                "sambamba", "seqtk"],
               {"files": 1.5})]
    if checkpoints.get("vc"):
        pp_align, pp_align_out = _postprocess_alignment(checkpoints)
    else:
        pp_align, pp_align_out = [], []
    quantitate = [s("rnaseq_quantitate", "multi-parallel",
                  [["trim_rec"], ["align_bam"]],
                  [cwlout(dd.get_keys("count_file"), "File"),
                   cwlout(["quant", "tsv"], "File"),
                   cwlout(["quant", "hdf5"], "File"),
                   cwlout(["quant", "fusion"], "File")],
                  "bcbio-rnaseq", programs=["sailfish", "salmon", "kallisto>=0.43.1", "subread", "gffread",
                                            "r=3.4.1", "r-base=3.4.1=h4fe35fd_8", "xorg-libxt", "r-wasabi"],
                  disk={"files": 0.5})]
    qc = [s("qc_to_rec", "multi-combined",
            [["align_bam"], ["analysis"], ["reference", "fasta", "base"], dd.get_keys("gtf_file"),
             ["genome_build"], ["config", "algorithm", "coverage_interval"],
             ["config", "algorithm", "tools_on"], ["config", "algorithm", "tools_off"],
             ["config", "algorithm", "qc"]],
            [cwlout("qc_rec", "record")],
            "bcbio-rnaseq", disk={"files": 1.5}, cores=1, no_files=True),
          s("pipeline_summary", "multi-parallel",
            [["qc_rec"]],
            [cwlout("qcout_rec", "record",
                    fields=[cwlout(["summary", "qc"], ["File", "null"]),
                            cwlout(["summary", "metrics"], ["string", "null"]),
                            cwlout("inherit")])],
            "bcbio-rnaseq", ["bedtools", "fastqc=0.11.7=5", "goleft", "hts-nim-tools", "mosdepth",
                             "picard", "pythonpy", "qsignature", "qualimap",
                             "sambamba", "samtools"]),
          s("multiqc_summary", "multi-combined",
            [["qcout_rec"]],
            [cwlout(["summary", "multiqc"], ["File", "null"])],
            "bcbio-rnaseq", ["multiqc", "multiqc-bcbio"])]
    vc, vc_out = _variant_vc(checkpoints)
    fusion = [s("detect_fusions", "multi-parallel",
                [["quant", "fusion"], ["quant", "hdf5"], ["trim_rec"]],
                [cwlout(["fusion", "fasta"], "File"),
                 cwlout(["fusion", "json"], "File")],
                "bcbio-rnaseq", ["pizzly"])]

    steps = prep + align + pp_align + quantitate + qc + vc + fusion
    final_outputs = [["rgnames", "sample"], dd.get_keys("align_bam"), ["quant", "tsv"], ["summary", "multiqc"]] + \
                    vc_out + pp_align_out
    return steps, final_outputs