Example #1
0
def standardpipeline(config, run_info_yaml, parallel, dirs, samples):
    ## Alignment and preparation requiring the entire input file (multicore cluster)
    with prun.start(_wres(parallel, ["aligner", "samtools", "sambamba"]),
                    samples, config, dirs, "multicore") as run_parallel:
        with profile.report("organize samples", dirs):
            samples = run_parallel("organize_samples", [[
                dirs, config, run_info_yaml,
                [x[0]["description"] for x in samples]
            ]])
        with profile.report("alignment", dirs):
            samples = run_parallel("process_alignment", samples)
        with profile.report("callable regions", dirs):
            samples = run_parallel("prep_samples", [samples])
            samples = run_parallel("postprocess_alignment", samples)
            samples = run_parallel("combine_sample_regions", [samples])
            samples = region.clean_sample_data(samples)
    ## Quality control
    with prun.start(
            _wres(parallel,
                  ["fastqc", "qsignature", "kraken", "gatk", "samtools"]),
            samples, config, dirs, "multicore2") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples
Example #2
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner"],
                           ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
                     [samples[0]], config, dirs, "organize_samples") as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                          [x[0]["description"] for x in samples]]])
     with prun.start(_wres(parallel, ["picard", "cutadapt"]),
                     samples, config, dirs, "trimming") as run_parallel:
         with profile.report("adapter trimming", dirs):
             samples = run_parallel("prepare_sample", samples)
             samples = run_parallel("trim_sample", samples)
     with prun.start(_wres(parallel, ["aligner", "picard"],
                           ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
                     samples, config, dirs, "alignment",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         with profile.report("alignment", dirs):
             samples = run_parallel("disambiguate_split", [samples])
             samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                     samples, config, dirs, "rnaseqcount") as run_parallel:
         with profile.report("disambiguation", dirs):
             samples = disambiguate.resolve(samples, run_parallel)
         with profile.report("transcript assembly", dirs):
             samples = rnaseq.assemble_transcripts(run_parallel, samples)
         with profile.report("estimate expression", dirs):
             samples = rnaseq.estimate_expression(samples, run_parallel)
     with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc", "kraken"]),
                     samples, config, dirs, "qc") as run_parallel:
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #3
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner"],
                           ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
                     [samples[0]], config, dirs, "organize_samples") as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                          [x[0]["description"] for x in samples]]])
     with prun.start(_wres(parallel, ["picard", "cutadapt"]),
                     samples, config, dirs, "trimming") as run_parallel:
         with profile.report("adapter trimming", dirs):
             samples = run_parallel("prepare_sample", samples)
             samples = run_parallel("trim_sample", samples)
     with prun.start(_wres(parallel, ["aligner", "picard"],
                           ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
                     samples, config, dirs, "alignment",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         with profile.report("alignment", dirs):
             samples = disambiguate.split(samples)
             samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                     samples, config, dirs, "rnaseqcount") as run_parallel:
         with profile.report("disambiguation", dirs):
             samples = disambiguate.resolve(samples, run_parallel)
         with profile.report("transcript assembly", dirs):
             samples = rnaseq.assemble_transcripts(run_parallel, samples)
         with profile.report("estimate expression", dirs):
             samples = rnaseq.estimate_expression(samples, run_parallel)
     with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc", "kraken"]),
                     samples, config, dirs, "qc") as run_parallel:
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #4
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard"]),
                        samples, config, dirs, "trimming") as run_parallel:
            samples = run_parallel("process_lane", samples)
            samples = run_parallel("trim_lane", samples)
        with prun.start(_wres(parallel, ["aligner"],
                              ensure_mem={"tophat": 8, "tophat2": 8, "star": 30}),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = disambiguate.resolve(samples, run_parallel)

        with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                        samples, config, dirs, "rnaseqcount") as run_parallel:
            samples = rnaseq.estimate_expression(samples, run_parallel)
            #samples = rnaseq.detect_fusion(samples, run_parallel)

        combined = combine_count_files([x[0].get("count_file") for x in samples])
        gtf_file = utils.get_in(samples[0][0], ('genome_resources', 'rnaseq',
                                                'transcripts'), None)
        annotated = annotate_combined_count_file(combined, gtf_file)
        for x in samples:
            x[0]["combined_counts"] = combined
            if annotated:
                x[0]["annotated_combined_counts"] = annotated

        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc"]),
                        samples, config, dirs, "persample") as run_parallel:
            samples = qcsummary.generate_parallel(samples, run_parallel)
        return samples
Example #5
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(parallel, samples, config, dirs, "trimming") as run_parallel:
            samples = run_parallel("trim_lane", samples)
        with prun.start(
            _wprogs(parallel, ["aligner"], {"tophat": 8, "tophat2": 8, "star": 30}),
            samples,
            config,
            dirs,
            "multicore",
            multiplier=alignprep.parallel_multiplier(samples),
        ) as run_parallel:
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = disambiguate.resolve(samples, run_parallel)

        with prun.start(
            _wprogs(parallel, ["samtools", "gatk", "cufflinks"]), samples, config, dirs, "rnaseqcount"
        ) as run_parallel:
            samples = rnaseq.estimate_expression(samples, run_parallel)
            # samples = rnaseq.detect_fusion(samples, run_parallel)

        combined = combine_count_files([x[0].get("count_file") for x in samples])
        organism = utils.get_in(samples[0][0], ("genome_resources", "aliases", "ensembl"), None)
        annotated = annotate_combined_count_file(combined, organism)
        for x in samples:
            x[0]["combined_counts"] = combined
            x[0]["annotated_combined_counts"] = annotated

        with prun.start(
            _wprogs(parallel, ["picard", "fastqc", "rnaseqc"]), samples, config, dirs, "persample"
        ) as run_parallel:
            samples = qcsummary.generate_parallel(samples, run_parallel)
        return samples
Example #6
0
 def run(self, config, config_file, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wres(parallel, ["aligner"]),
                     lane_items, config, dirs, "multicore") as run_parallel:
         with profile.report("alignment", dirs):
             samples = run_parallel("process_alignment", lane_items)
         with profile.report("callable regions", dirs):
             samples = run_parallel("postprocess_alignment", samples)
             regions = run_parallel("combine_sample_regions", [samples])[0]
             samples = region.add_region_info(samples, regions)
             samples = region.clean_sample_data(samples)
     ## Processing on sub regions
     with prun.start(_wres(parallel, ["gatk", "picard", "samtools"]),
                     samples, config, dirs, "full",
                     multiplier=len(regions["analysis"]), max_multicore=1) as run_parallel:
         with profile.report("alignment post-processing", dirs):
             samples = region.parallel_prep_region(samples, regions, run_parallel)
             samples = region.parallel_variantcall_region(samples, run_parallel)
     print len(samples)
     ## Finalize BAMs and QC
     with prun.start(_wres(parallel, ["fastqc", "bamtools", "samtools"]),
                     samples, config, dirs, "multicore2") as run_parallel:
         with profile.report("prepped BAM merging", dirs):
             samples = region.delayed_bamprep_merge(samples, run_parallel)
         print len(samples)
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #7
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard", "AlienTrimmer"]),
                        samples, config, dirs, "trimming") as run_parallel:
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("prepare_sample", samples)
                samples = run_parallel("trim_sample", samples)
        with prun.start(_wres(parallel, ["aligner", "picard"],
                              ensure_mem={"tophat": 8, "tophat2": 8, "star": 40}),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment", dirs):
                samples = disambiguate.split(samples)
                samples = run_parallel("process_alignment", samples)
        with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                        samples, config, dirs, "rnaseqcount") as run_parallel:
            with profile.report("disambiguation", dirs):
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("transcript assembly", dirs):
                samples = rnaseq.assemble_transcripts(run_parallel, samples)
            with profile.report("estimate expression", dirs):
                samples = rnaseq.estimate_expression(samples, run_parallel)

        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc","kraken"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
        
        logger.info("Timing: finished")
        return samples
Example #8
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(parallel, ["aligner", "samtools", "sambamba"],
                              (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment preparation", dirs):
                samples = run_parallel("prep_align_inputs", samples)
                samples = disambiguate.split(samples)
            with profile.report("alignment", dirs):
                samples = run_parallel("process_alignment", samples)
                samples = alignprep.merge_split_alignments(samples, run_parallel)
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("callable regions", dirs):
                samples = run_parallel("postprocess_alignment", samples)
                samples = run_parallel("combine_sample_regions", [samples])
                samples = region.clean_sample_data(samples)
            with profile.report("coverage", dirs):
                samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples, config, dirs, "full",
                        multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:
            with profile.report("alignment post-processing", dirs):
                samples = region.parallel_prep_region(samples, run_parallel)
            with profile.report("variant calling", dirs):
                samples = genotype.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("joint squaring off/backfilling", dirs):
                samples = joint.square_off(samples, run_parallel)
            with profile.report("variant post-processing", dirs):
                samples = run_parallel("postprocess_variants", samples)
                samples = run_parallel("split_variants_by_sample", samples)
            with profile.report("validation", dirs):
                samples = run_parallel("compare_to_rm", samples)
                samples = genotype.combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",
                                         "bcbio-variation-recall"]),
                        samples, config, dirs, "multicore2") as run_parallel:
            with profile.report("prepped BAM merging", dirs):
                samples = region.delayed_bamprep_merge(samples, run_parallel)
            with profile.report("ensemble calling", dirs):
                samples = ensemble.combine_calls_parallel(samples, run_parallel)
            with profile.report("validation summary", dirs):
                samples = validate.summarize_grading(samples)
            with profile.report("structural variation", dirs):
                samples = structural.run(samples, run_parallel)
            with profile.report("population database", dirs):
                samples = population.prep_db_parallel(samples, run_parallel)
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("archive", dirs):
                samples = archive.compress(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Example #9
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     samples = run_parallel("process_alignment", lane_items)
     samples = run_parallel("generate_transcript_counts", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     #run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Example #10
0
def chipseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    with prun.start(_wres(parallel, ["aligner", "picard"]),
                    samples, config, dirs, "multicore",
                    multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("organize samples", dirs):
            samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                            [x[0]["description"] for x in samples]]])
        with profile.report("alignment", dirs):
            samples = run_parallel("prepare_sample", samples)
            samples = run_parallel("trim_sample", samples)
            samples = run_parallel("disambiguate_split", [samples])
            samples = run_parallel("process_alignment", samples)

        with profile.report("disambiguation", dirs):
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("clean_chipseq_alignment", samples)

    with prun.start(_wres(parallel, ["peakcaller"]),
                    samples, config, dirs, "peakcalling",
                    multiplier = peaks._get_multiplier(samples)) as run_parallel:
        with profile.report("peakcalling", dirs):
            samples = peaks.peakcall_prepare(samples, run_parallel)

    with prun.start(_wres(parallel, ["picard", "fastqc"]),
                    samples, config, dirs, "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples
Example #11
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     samples = disambiguate.split(lane_items)
     samples = run_parallel("process_alignment", samples)
     samples = run_parallel("clean_chipseq_alignment", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Example #12
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner", "picard"]),
                     samples,
                     config,
                     dirs,
                     "multicore",
                     multiplier=alignprep.parallel_multiplier(
                         samples)) as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel("organize_samples", [[
                 dirs, config, run_info_yaml,
                 [x[0]["description"] for x in samples]
             ]])
         samples = run_parallel("prepare_sample", samples)
         samples = run_parallel("trim_sample", samples)
         samples = run_parallel("disambiguate_split", [samples])
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]), samples, config,
                     dirs, "persample") as run_parallel:
         with profile.report("disambiguation", dirs):
             samples = disambiguate.resolve(samples, run_parallel)
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
         with profile.report("upload", dirs):
             samples = run_parallel("upload_samples", samples)
             for sample in samples:
                 run_parallel("upload_samples_project", [sample])
     return samples
Example #13
0
def chipseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    with prun.start(_wres(parallel, ["aligner", "picard"]),
                    samples, config, dirs, "multicore",
                    multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("organize samples", dirs):
            samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                            [x[0]["description"] for x in samples]]])
        with profile.report("alignment", dirs):
            samples = run_parallel("prepare_sample", samples)
            samples = run_parallel("trim_sample", samples)
            samples = run_parallel("disambiguate_split", [samples])
            samples = run_parallel("process_alignment", samples)

        with profile.report("disambiguation", dirs):
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("clean_chipseq_alignment", samples)

    with prun.start(_wres(parallel, ["peakcaller"]),
                    samples, config, dirs, "peakcalling",
                    multiplier = peaks._get_multiplier(samples)) as run_parallel:
        with profile.report("peakcalling", dirs):
            samples = peaks.peakcall_prepare(samples, run_parallel)

    with prun.start(_wres(parallel, ["picard", "fastqc"]),
                    samples, config, dirs, "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    return samples
Example #14
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wres(parallel, ["aligner"]), samples, config, dirs, "multicore") as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel(
                 "organize_samples", [[dirs, config, run_info_yaml, [x[0]["description"] for x in samples]]]
             )
         with profile.report("alignment", dirs):
             samples = run_parallel("process_alignment", samples)
         with profile.report("callable regions", dirs):
             samples = run_parallel("prep_samples", [samples])
             samples = run_parallel("postprocess_alignment", samples)
             samples = run_parallel("combine_sample_regions", [samples])
             samples = region.clean_sample_data(samples)
     ## Quality control
     with prun.start(
         _wres(parallel, ["fastqc", "bamtools", "samtools", "qsignature", "kraken"]),
         samples,
         config,
         dirs,
         "multicore2",
     ) as run_parallel:
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
         with profile.report("upload", dirs):
             for sample in samples:
                 run_parallel("upload_samples", [sample])
     logger.info("Timing: finished")
     return samples
Example #15
0
 def run(self, config, run_info_yaml, parallel, dirs, samples):
     with prun.start(
         _wres(parallel, ["aligner", "picard"]),
         samples,
         config,
         dirs,
         "multicore",
         multiplier=alignprep.parallel_multiplier(samples),
     ) as run_parallel:
         with profile.report("organize samples", dirs):
             samples = run_parallel(
                 "organize_samples", [[dirs, config, run_info_yaml, [x[0]["description"] for x in samples]]]
             )
         samples = run_parallel("prepare_sample", samples)
         samples = run_parallel("trim_sample", samples)
         samples = run_parallel("disambiguate_split", [samples])
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]), samples, config, dirs, "persample") as run_parallel:
         with profile.report("disambiguation", dirs):
             samples = disambiguate.resolve(samples, run_parallel)
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
         with profile.report("upload", dirs):
             for sample in samples:
                 run_parallel("upload_samples", [sample])
     return samples
Example #16
0
def rnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs,
                                  samples)
    with prun.start(
            _wres(parallel, ["aligner", "picard", "samtools"],
                  ensure_mem={
                      "tophat": 10,
                      "tophat2": 10,
                      "star": 2,
                      "hisat2": 8
                  }),
            samples,
            config,
            dirs,
            "alignment",
            multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("alignment", dirs):
            samples = run_parallel("disambiguate_split", [samples])
            samples = run_parallel("process_alignment", samples)
    with prun.start(_wres(parallel, ["samtools", "cufflinks"]), samples,
                    config, dirs, "rnaseqcount") as run_parallel:
        with profile.report("disambiguation", dirs):
            samples = disambiguate.resolve(samples, run_parallel)
        with profile.report("transcript assembly", dirs):
            samples = rnaseq.assemble_transcripts(run_parallel, samples)
        with profile.report("estimate expression (threaded)", dirs):
            samples = rnaseq.quantitate_expression_parallel(
                samples, run_parallel)

    with prun.start(_wres(parallel, ["dexseq", "express"]),
                    samples,
                    config,
                    dirs,
                    "rnaseqcount-singlethread",
                    max_multicore=1) as run_parallel:
        with profile.report("estimate expression (single threaded)", dirs):
            samples = rnaseq.quantitate_expression_noparallel(
                samples, run_parallel)

    samples = rnaseq.combine_files(samples)
    with prun.start(_wres(parallel, ["gatk", "vardict"]), samples, config,
                    dirs, "rnaseq-variation") as run_parallel:
        with profile.report("RNA-seq variant calling", dirs):
            samples = rnaseq.rnaseq_variant_calling(samples, run_parallel)

    with prun.start(
            _wres(
                parallel,
                ["samtools", "fastqc", "qualimap", "kraken", "gatk", "preseq"],
                ensure_mem={"qualimap": 4}), samples, config, dirs,
            "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples
Example #17
0
def smallrnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    # causes a circular import at the top level
    from bcbio.srna.group import report as srna_report

    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs,
                                  samples)

    with prun.start(
            _wres(parallel, ["aligner", "picard", "samtools"],
                  ensure_mem={
                      "bowtie": 8,
                      "bowtie2": 8,
                      "star": 2
                  }), [samples[0]], config, dirs, "alignment") as run_parallel:
        with profile.report("prepare", dirs):
            samples = run_parallel("seqcluster_prepare", [samples])
        with profile.report("seqcluster alignment", dirs):
            samples = run_parallel("srna_alignment", [samples])

    with prun.start(
            _wres(parallel, ["aligner", "picard", "samtools"],
                  ensure_mem={
                      "tophat": 10,
                      "tophat2": 10,
                      "star": 2,
                      "hisat2": 8
                  }),
            samples,
            config,
            dirs,
            "alignment_samples",
            multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("alignment", dirs):
            samples = run_parallel("process_alignment", samples)

    with prun.start(_wres(parallel, ["picard", "miraligner"]), samples, config,
                    dirs, "annotation") as run_parallel:
        with profile.report("small RNA annotation", dirs):
            samples = run_parallel("srna_annotation", samples)

    with prun.start(
            _wres(parallel, ["seqcluster", "mirge"],
                  ensure_mem={"seqcluster": 8}), [samples[0]], config, dirs,
            "cluster") as run_parallel:
        with profile.report("cluster", dirs):
            samples = run_parallel("seqcluster_cluster", [samples])

    with prun.start(_wres(parallel, ["picard", "fastqc"]), samples, config,
                    dirs, "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("report", dirs):
            srna_report(samples)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])

    return samples
Example #18
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(parallel, ["aligner", "samtools", "sambamba"],
                              (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment preparation", dirs):
                samples = run_parallel("prep_align_inputs", samples)
                samples = disambiguate.split(samples)
            with profile.report("alignment", dirs):
                samples = run_parallel("process_alignment", samples)
                samples = alignprep.merge_split_alignments(samples, run_parallel)
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("callable regions", dirs):
                samples = run_parallel("postprocess_alignment", samples)
                samples = run_parallel("combine_sample_regions", [samples])
                samples = region.clean_sample_data(samples)
            with profile.report("coverage", dirs):
                samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples, config, dirs, "full",
                        multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:
            with profile.report("alignment post-processing", dirs):
                samples = region.parallel_prep_region(samples, run_parallel)
            with profile.report("variant calling", dirs):
                samples = genotype.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("variant post-processing", dirs):
                samples = run_parallel("postprocess_variants", samples)
                samples = run_parallel("split_variants_by_sample", samples)
            with profile.report("validation", dirs):
                samples = run_parallel("compare_to_rm", samples)
                samples = genotype.combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",
                                         "bcbio-variation-recall"]),
                        samples, config, dirs, "multicore2") as run_parallel:
            with profile.report("prepped BAM merging", dirs):
                samples = region.delayed_bamprep_merge(samples, run_parallel)
            with profile.report("ensemble calling", dirs):
                samples = ensemble.combine_calls_parallel(samples, run_parallel)
            with profile.report("validation summary", dirs):
                samples = validate.summarize_grading(samples)
            with profile.report("structural variation", dirs):
                samples = structural.run(samples, run_parallel)
            with profile.report("population database", dirs):
                samples = population.prep_db_parallel(samples, run_parallel)
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("archive", dirs):
                samples = archive.compress(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Example #19
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("generate_transcript_counts", samples)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Example #20
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("generate_transcript_counts", samples)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Example #21
0
    def run(self, config, config_file, run_parallel, parallel, dirs,
            lane_items):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["align_prep_full"],
                             lane_items, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel(
                "align_prep_full",
                [list(x) + [config_file] for x in lane_items])
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel,
                             "full", ["piped_bamprep", "variantcall_sample"],
                             samples,
                             dirs,
                             config,
                             multiplier=len(regions["analysis"])) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2",
                             ["prep_gemini_db", "delayed_bam_merge"], samples,
                             dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Example #22
0
    def run(self, config, config_file, run_parallel, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["process_alignment", "postprocess_alignment"],
                             samples, dirs, config,
                             multiplier=alignprep.parallel_multiplier(samples)) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel, "full", ["piped_bamprep", "variantcall_sample"],
                             samples, dirs, config,
                             multiplier=len(regions["analysis"]), max_multicore=1) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions, run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2", ["prep_gemini_db", "delayed_bam_merge"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Example #23
0
 def run(self, config, config_file, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wprogs(parallel, ["aligner"]), lane_items, config, dirs, "multicore") as run_parallel:
         logger.info("Timing: alignment")
         samples = run_parallel("process_alignment", lane_items)
     ## Finalize (per-sample cluster)
     with prun.start(_wprogs(parallel, ["fastqc", "bamtools"]), samples, config, dirs, "persample") as run_parallel:
         logger.info("Timing: quality control")
         samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #24
0
    def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
        lane_items = run_parallel("trim_lane", lane_items)
        samples = disambiguate.split(lane_items)
        samples = run_parallel("process_alignment", samples)
        samples = disambiguate.resolve(samples, run_parallel)
        samples = run_parallel("generate_transcript_counts", samples)
        combined = combine_count_files([x[0].get("count_file") for x in samples])
        for x in samples:
            x[0]["combined_counts"] = combined

        samples = qcsummary.generate_parallel(samples, run_parallel)
        #run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
        return samples
Example #25
0
 def run(self, config, config_file, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wres(parallel, ["aligner"]), lane_items, config, dirs,
                     "multicore") as run_parallel:
         logger.info("Timing: alignment")
         samples = run_parallel("process_alignment", lane_items)
     ## Finalize (per-sample cluster)
     with prun.start(_wres(parallel, ["fastqc", "bamtools"]), samples,
                     config, dirs, "persample") as run_parallel:
         logger.info("Timing: quality control")
         samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #26
0
 def run(self, config, config_file, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner", "picard"]),
                     samples, config, dirs, "multicore",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         samples = run_parallel("process_lane", samples)
         samples = run_parallel("trim_lane", samples)
         samples = disambiguate.split(samples)
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]),
                     samples, config, dirs, "persample") as run_parallel:
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Example #27
0
 def run(self, config, config_file, parallel, dirs, samples):
     with prun.start(_wres(parallel, ["aligner", "picard"]),
                     samples, config, dirs, "multicore",
                     multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
         samples = run_parallel("process_lane", samples)
         samples = run_parallel("trim_lane", samples)
         samples = disambiguate.split(samples)
         samples = run_parallel("process_alignment", samples)
     with prun.start(_wres(parallel, ["picard", "fastqc"]),
                     samples, config, dirs, "persample") as run_parallel:
         samples = run_parallel("clean_chipseq_alignment", samples)
         samples = qcsummary.generate_parallel(samples, run_parallel)
     return samples
Example #28
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with global_parallel(parallel, "multicore", ["align_prep_full"], lane_items, dirs, config) as parallel:
         run_parallel = parallel_runner(parallel, dirs, config)
         logger.info("Timing: alignment")
         samples = run_parallel("process_alignment", lane_items)
     ## Finalize (per-sample cluster)
     with global_parallel(parallel, "persample", ["postprocess_variants"], samples, dirs, config) as parallel:
         run_parallel = parallel_runner(parallel, dirs, config)
         logger.info("Timing: quality control")
         samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #29
0
    def run(self, config, run_info_yaml, parallel, dirs, samples):
        # causes a circular import at the top level
        from bcbio.srna.group import report as srna_report

        with prun.start(_wres(parallel, ["picard", "cutadapt"]), samples,
                        config, dirs, "trimming") as run_parallel:
            with profile.report("organize samples", dirs):
                samples = run_parallel("organize_samples", [[
                    dirs, config, run_info_yaml,
                    [x[0]["description"] for x in samples]
                ]])
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("prepare_sample", samples)
                samples = run_parallel("trim_srna_sample", samples)

        with prun.start(
                _wres(parallel, ["aligner", "picard", "samtools"],
                      ensure_mem={
                          "bowtie": 8,
                          "bowtie2": 8,
                          "star": 2
                      }), [samples[0]], config, dirs,
                "alignment") as run_parallel:
            with profile.report("prepare", dirs):
                samples = run_parallel("seqcluster_prepare", [samples])
            with profile.report("alignment", dirs):
                samples = run_parallel("srna_alignment", [samples])

        with prun.start(_wres(parallel, ["picard", "miraligner"]), samples,
                        config, dirs, "annotation") as run_parallel:
            with profile.report("small RNA annotation", dirs):
                samples = run_parallel("srna_annotation", samples)

        with prun.start(
                _wres(parallel, ["seqcluster"], ensure_mem={"seqcluster": 8}),
            [samples[0]], config, dirs, "cluster") as run_parallel:
            with profile.report("cluster", dirs):
                samples = run_parallel("seqcluster_cluster", [samples])

        with prun.start(_wres(parallel, ["picard", "fastqc"]), samples, config,
                        dirs, "qc") as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("report", dirs):
                srna_report(samples)
            with profile.report("upload", dirs):
                samples = run_parallel("upload_samples", samples)
                for sample in samples:
                    run_parallel("upload_samples_project", [sample])

        return samples
Example #30
0
def fastrnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs, samples)
    with prun.start(_wres(parallel, ["samtools"]), samples, config,
                    dirs, "fastrnaseq") as run_parallel:
        with profile.report("fastrnaseq", dirs):
            samples = rnaseq.fast_rnaseq(samples, run_parallel)
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for samples in samples:
                run_parallel("upload_samples_project", [samples])
    logger.info("Timing: finished")
    return samples
Example #31
0
def singlecellrnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs, samples)
    with prun.start(_wres(parallel, ["samtools", "rapmap"]), samples, config,
                    dirs, "singlecell-rnaseq") as run_parallel:
        with profile.report("singlecell-rnaseq", dirs):
            samples = rnaseq.singlecell_rnaseq(samples, run_parallel)
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for samples in samples:
                run_parallel("upload_samples_project", [samples])
    logger.info("Timing: finished")
    return samples
Example #32
0
    def run(self, config, run_info_yaml, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard", "cutadapt"]), samples, config, dirs, "trimming") as run_parallel:
            with profile.report("organize samples", dirs):
                samples = run_parallel(
                    "organize_samples", [[dirs, config, run_info_yaml, [x[0]["description"] for x in samples]]]
                )
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("prepare_sample", samples)
                samples = run_parallel("trim_sample", samples)
        with prun.start(
            _wres(parallel, ["aligner", "picard"], ensure_mem={"tophat": 8, "tophat2": 8, "star": 2}),
            samples,
            config,
            dirs,
            "alignment",
            multiplier=alignprep.parallel_multiplier(samples),
        ) as run_parallel:
            with profile.report("alignment", dirs):
                samples = run_parallel("disambiguate_split", [samples])
                samples = run_parallel("process_alignment", samples)
        with prun.start(
            _wres(parallel, ["samtools", "cufflinks"]), samples, config, dirs, "rnaseqcount"
        ) as run_parallel:
            with profile.report("disambiguation", dirs):
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("transcript assembly", dirs):
                samples = rnaseq.assemble_transcripts(run_parallel, samples)
            with profile.report("estimate expression (threaded)", dirs):
                samples = rnaseq.quantitate_expression_parallel(samples, run_parallel)
        with prun.start(
            _wres(parallel, ["dexseq", "express"]), samples, config, dirs, "rnaseqcount-singlethread", max_multicore=1
        ) as run_parallel:
            with profile.report("estimate expression (single threaded)", dirs):
                samples = rnaseq.quantitate_expression_noparallel(samples, run_parallel)
        samples = rnaseq.combine_files(samples)
        with prun.start(_wres(parallel, ["gatk"]), samples, config, dirs, "rnaseq-variation") as run_parallel:
            with profile.report("RNA-seq variant calling", dirs):
                samples = rnaseq.rnaseq_variant_calling(samples, run_parallel)

        with prun.start(
            _wres(parallel, ["picard", "fastqc", "rnaseqc", "kraken"]), samples, config, dirs, "qc"
        ) as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("upload", dirs):
                for sample in samples:
                    run_parallel("upload_samples", [sample])
        logger.info("Timing: finished")
        return samples
Example #33
0
    def run(self, config, config_file, run_parallel, parallel, dirs,
            lane_items):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["align_prep_full"],
                             lane_items, dirs["work"], config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel(
                "align_prep_full",
                [list(x) + [config_file] for x in lane_items])
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(
                parallel,
                "full", ["piped_bamprep", "variantcall_sample"],
                samples,
                dirs["work"],
                config,
                multiplier=len(regions["analysis"])) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs["work"], config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
            logger.info("Timing: finished")
        return samples
Example #34
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with global_parallel(parallel, "multicore", ["process_alignment"],
                          lane_items, dirs, config) as parallel:
         run_parallel = parallel_runner(parallel, dirs, config)
         logger.info("Timing: alignment")
         samples = run_parallel("process_alignment", lane_items)
     ## Finalize (per-sample cluster)
     with global_parallel(parallel, "persample", ["postprocess_variants"],
                          samples, dirs, config) as parallel:
         run_parallel = parallel_runner(parallel, dirs, config)
         logger.info("Timing: quality control")
         samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #35
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Example #36
0
def fastrnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs, samples)
    ww = initialize_watcher(samples)
    with prun.start(_wres(parallel, ["samtools"]), samples, config,
                    dirs, "fastrnaseq") as run_parallel:
        with profile.report("fastrnaseq", dirs):
            samples = rnaseq.fast_rnaseq(samples, run_parallel)
            ww.report("fastrnaseq", samples)
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
            ww.report("qcsummary", samples)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for samples in samples:
                run_parallel("upload_samples_project", [samples])
    logger.info("Timing: finished")
    return samples
Example #37
0
 def run(self, config, config_file, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wres(parallel, ["aligner"]),
                     lane_items, config, dirs, "multicore") as run_parallel:
         with profile.report("alignment", dirs):
             samples = run_parallel("process_alignment", lane_items)
         with profile.report("callable regions", dirs):
             samples = run_parallel("postprocess_alignment", samples)
             samples = run_parallel("combine_sample_regions", [samples])
             samples = region.clean_sample_data(samples)
     ## Quality control
     with prun.start(_wres(parallel, ["fastqc", "bamtools", "samtools"]),
                     samples, config, dirs, "multicore2") as run_parallel:
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #38
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full", (list(x) + [config_file] for x in lane_items))
     regions = callable.combine_sample_regions(samples)
     samples = region.add_region_info(samples, regions)
     # Handle all variant calling on sub-regions of the input file
     samples = region.clean_sample_data(samples)
     samples = region.parallel_prep_region(samples, regions, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = population.prep_db_parallel(samples, run_parallel)
     samples = region.delayed_bamprep_merge(samples, run_parallel)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     samples = validate.summarize_grading(samples)
     return samples
Example #39
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Example #40
0
def rnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs, samples)
    with prun.start(_wres(parallel, ["aligner", "picard", "samtools"],
                            ensure_mem={"tophat": 10, "tophat2": 10, "star": 2, "hisat2": 8}),
                    samples, config, dirs, "alignment",
                    multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("alignment", dirs):
            samples = run_parallel("disambiguate_split", [samples])
            samples = run_parallel("process_alignment", samples)
    with prun.start(_wres(parallel, ["samtools", "cufflinks"]),
                    samples, config, dirs, "rnaseqcount") as run_parallel:
        with profile.report("disambiguation", dirs):
            samples = disambiguate.resolve(samples, run_parallel)
        with profile.report("transcript assembly", dirs):
            samples = rnaseq.assemble_transcripts(run_parallel, samples)
        with profile.report("estimate expression (threaded)", dirs):
            samples = rnaseq.quantitate_expression_parallel(samples, run_parallel)

    with prun.start(_wres(parallel, ["ericscript"]), samples, config,
                    dirs, "fusion-standalone-callers") as run_parallel:
        with profile.report("Detect gene fusions", dirs):
            rnaseq.detect_fusions(samples)

    with prun.start(_wres(parallel, ["dexseq", "express"]), samples, config,
                    dirs, "rnaseqcount-singlethread", max_multicore=1) as run_parallel:
        with profile.report("estimate expression (single threaded)", dirs):
            samples = rnaseq.quantitate_expression_noparallel(samples, run_parallel)


    samples = rnaseq.combine_files(samples)
    with prun.start(_wres(parallel, ["gatk"]), samples, config,
                    dirs, "rnaseq-variation") as run_parallel:
        with profile.report("RNA-seq variant calling", dirs):
            samples = rnaseq.rnaseq_variant_calling(samples, run_parallel)

    with prun.start(_wres(parallel, ["samtools", "fastqc", "qualimap",
                                     "kraken", "gatk", "preseq"], ensure_mem={"qualimap": 4}),
                    samples, config, dirs, "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples
Example #41
0
def wgbsseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    with prun.start(
            _wres(parallel, ["fastqc", "picard"], ensure_mem={"fastqc": 4}),
            samples, config, dirs, "trimming") as run_parallel:
        with profile.report("organize samples", dirs):
            samples = run_parallel("organize_samples", [[
                dirs, config, run_info_yaml,
                [x[0]["description"] for x in samples]
            ]])
            samples = run_parallel("prepare_sample", samples)
            samples = run_parallel("trim_bs_sample", samples)

    with prun.start(
            _wres(parallel, ["aligner", "bismark", "picard", "samtools"]),
            samples,
            config,
            dirs,
            "multicore",
            multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("alignment", dirs):
            samples = run_parallel("process_alignment", samples)

    with prun.start(_wres(parallel, ['samtools']), samples, config, dirs,
                    'deduplication') as run_parallel:
        with profile.report('deduplicate', dirs):
            samples = run_parallel('deduplicate_bismark', samples)

    with prun.start(_wres(parallel, ["caller"], ensure_mem={"caller": 5}),
                    samples,
                    config,
                    dirs,
                    "multicore2",
                    multiplier=24) as run_parallel:
        with profile.report("cpg calling", dirs):
            samples = run_parallel("cpg_calling", samples)

    with prun.start(_wres(parallel, ["picard", "fastqc", "samtools"]), samples,
                    config, dirs, "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples
Example #42
0
    def run(self, config, config_file, parallel, dirs, samples):
        with prun.start(_wres(parallel, ["picard", "AlienTrimmer"]), samples,
                        config, dirs, "trimming") as run_parallel:
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("process_lane", samples)
                samples = run_parallel("trim_lane", samples)
        with prun.start(_wres(parallel, ["aligner", "picard"],
                              ensure_mem={
                                  "tophat": 8,
                                  "tophat2": 8,
                                  "star": 40
                              }),
                        samples,
                        config,
                        dirs,
                        "multicore",
                        multiplier=alignprep.parallel_multiplier(
                            samples)) as run_parallel:
            with profile.report("alignment", dirs):
                samples = disambiguate.split(samples)
                samples = run_parallel("process_alignment", samples)

        with prun.start(_wres(parallel, ["samtools", "cufflinks"]), samples,
                        config, dirs, "rnaseqcount") as run_parallel:
            with profile.report("disambiguation", dirs):
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("estimate expression", dirs):
                samples = rnaseq.estimate_expression(samples, run_parallel)

        combined = combine_count_files(
            [x[0].get("count_file") for x in samples])
        gtf_file = utils.get_in(samples[0][0],
                                ('genome_resources', 'rnaseq', 'transcripts'),
                                None)
        annotated = annotate_combined_count_file(combined, gtf_file)
        for x in samples:
            x[0]["combined_counts"] = combined
            if annotated:
                x[0]["annotated_combined_counts"] = annotated

        with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Example #43
0
 def run(self, config, config_file, parallel, dirs, samples):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wres(parallel, ["aligner"]),
                     samples, config, dirs, "multicore") as run_parallel:
         with profile.report("alignment", dirs):
             samples = run_parallel("process_alignment", samples)
         with profile.report("callable regions", dirs):
             samples = run_parallel("postprocess_alignment", samples)
             samples = run_parallel("combine_sample_regions", [samples])
             samples = region.clean_sample_data(samples)
     ## Quality control
     with prun.start(_wres(parallel, ["fastqc", "bamtools", "samtools"]),
                     samples, config, dirs, "multicore2") as run_parallel:
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Example #44
0
    def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
        lane_items = run_parallel("trim_lane", lane_items)
        samples = disambiguate.split(lane_items)
        samples = run_parallel("process_alignment", samples)
        samples = disambiguate.resolve(samples, run_parallel)
        samples = rnaseq.estimate_expression(samples, run_parallel)
        #samples = rnaseq.detect_fusion(samples, run_parallel)
        combined = combine_count_files([x[0].get("count_file") for x in samples])
        organism = utils.get_in(samples[0][0], ('genome_resources', 'aliases',
                                                'ensembl'), None)
        annotated = annotate_combined_count_file(combined, organism)
        for x in samples:
            x[0]["combined_counts"] = combined
            x[0]["annotated_combined_counts"] = annotated

        samples = qcsummary.generate_parallel(samples, run_parallel)
        #run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
        return samples
Example #45
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full",
                            (list(x) + [config_file] for x in lane_items))
     regions = callable.combine_sample_regions(samples)
     samples = region.add_region_info(samples, regions)
     # Handle all variant calling on sub-regions of the input file
     samples = region.clean_sample_data(samples)
     samples = region.parallel_prep_region(samples, regions, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = population.prep_db_parallel(samples, run_parallel)
     samples = region.delayed_bamprep_merge(samples, run_parallel)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     samples = validate.summarize_grading(samples)
     return samples
Example #46
0
def smallrnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    # causes a circular import at the top level
    from bcbio.srna.group import report as srna_report

    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs, samples)

    with prun.start(_wres(parallel, ["aligner", "picard", "samtools"],
                          ensure_mem={"bowtie": 8, "bowtie2": 8, "star": 2}),
                    [samples[0]], config, dirs, "alignment") as run_parallel:
        with profile.report("prepare", dirs):
            samples = run_parallel("seqcluster_prepare", [samples])
        with profile.report("seqcluster alignment", dirs):
            samples = run_parallel("srna_alignment", [samples])

    with prun.start(_wres(parallel, ["aligner", "picard", "samtools"],
                            ensure_mem={"tophat": 10, "tophat2": 10, "star": 2, "hisat2": 8}),
                    samples, config, dirs, "alignment_samples",
                    multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("alignment", dirs):
            samples = run_parallel("process_alignment", samples)

    with prun.start(_wres(parallel, ["picard", "miraligner"]),
                    samples, config, dirs, "annotation") as run_parallel:
        with profile.report("small RNA annotation", dirs):
            samples = run_parallel("srna_annotation", samples)

    with prun.start(_wres(parallel, ["seqcluster", "mirge"],
                          ensure_mem={"seqcluster": 8}),
                    [samples[0]], config, dirs, "cluster") as run_parallel:
        with profile.report("cluster", dirs):
            samples = run_parallel("seqcluster_cluster", [samples])

    with prun.start(_wres(parallel, ["picard", "fastqc"]),
                    samples, config, dirs, "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("report", dirs):
            srna_report(samples)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])

    return samples
Example #47
0
    def run(self, config, run_info_yaml, parallel, dirs, samples):
        # causes a circular import at the top level
        from bcbio.srna.group import report as srna_report

        with prun.start(_wres(parallel, ["picard", "cutadapt"]),
                        samples, config, dirs, "trimming") as run_parallel:
            with profile.report("organize samples", dirs):
                samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                             [x[0]["description"] for x in samples]]])
            with profile.report("adapter trimming", dirs):
                samples = run_parallel("prepare_sample", samples)
                samples = run_parallel("trim_srna_sample", samples)

        with prun.start(_wres(parallel, ["aligner", "picard", "samtools"],
                              ensure_mem={"bowtie": 8, "bowtie2": 8, "star": 2}),
                        [samples[0]], config, dirs, "alignment") as run_parallel:
            with profile.report("prepare", dirs):
                samples = run_parallel("seqcluster_prepare", [samples])
            with profile.report("alignment", dirs):
                samples = run_parallel("srna_alignment", [samples])

        with prun.start(_wres(parallel, ["picard", "miraligner"]),
                        samples, config, dirs, "annotation") as run_parallel:
            with profile.report("small RNA annotation", dirs):
                samples = run_parallel("srna_annotation", samples)

        with prun.start(_wres(parallel, ["seqcluster"],
                              ensure_mem={"seqcluster": 8}),
                        [samples[0]], config, dirs, "cluster") as run_parallel:
            with profile.report("cluster", dirs):
                samples = run_parallel("seqcluster_cluster", [samples])

        with prun.start(_wres(parallel, ["picard", "fastqc"]),
                        samples, config, dirs, "qc") as run_parallel:
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
            with profile.report("report", dirs):
                srna_report(samples)
            with profile.report("upload", dirs):
                samples = run_parallel("upload_samples", samples)
                for sample in samples:
                    run_parallel("upload_samples_project", [sample])

        return samples
Example #48
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     raise NotImplementedError("`variant` processing is deprecated: please use `variant2`"
                               "The next version will alias variant to the new variant2 pipeline")
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Example #49
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     raise NotImplementedError("`variant` processing is deprecated: please use `variant2`"
                               "The next version will alias variant to the new variant2 pipeline")
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Example #50
0
def rnaseqpipeline(config, run_info_yaml, parallel, dirs, samples):
    samples = rnaseq_prep_samples(config, run_info_yaml, parallel, dirs,
                                  samples)
    with prun.start(
            _wres(parallel, ["aligner", "picard", "samtools"],
                  ensure_mem={
                      "tophat": 10,
                      "tophat2": 10,
                      "star": 2,
                      "hisat2": 8
                  }),
            samples,
            config,
            dirs,
            "alignment",
            multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("alignment", dirs):
            samples = run_parallel("disambiguate_split", [samples])
            samples = run_parallel("process_alignment", samples)
    with prun.start(_wres(parallel, ["samtools", "cufflinks"]), samples,
                    config, dirs, "rnaseqcount") as run_parallel:
        with profile.report("disambiguation", dirs):
            samples = disambiguate.resolve(samples, run_parallel)
        with profile.report("transcript assembly", dirs):
            samples = rnaseq.assemble_transcripts(run_parallel, samples)
        with profile.report("estimate expression (threaded)", dirs):
            samples = rnaseq.quantitate_expression_parallel(
                samples, run_parallel)

    with prun.start(_wres(parallel, ["dexseq", "express"]),
                    samples,
                    config,
                    dirs,
                    "rnaseqcount-singlethread",
                    max_multicore=1) as run_parallel:
        with profile.report("estimate expression (single threaded)", dirs):
            samples = rnaseq.quantitate_expression_noparallel(
                samples, run_parallel)

    samples = rnaseq.combine_files(samples)
    with prun.start(_wres(parallel, ["gatk", "vardict"]), samples, config,
                    dirs, "rnaseq-variation") as run_parallel:
        with profile.report("RNA-seq variant calling", dirs):
            samples = rnaseq.rnaseq_variant_calling(samples, run_parallel)

    with prun.start(
            _wres(
                parallel,
                ["samtools", "fastqc", "qualimap", "kraken", "gatk", "preseq"],
                ensure_mem={"qualimap": 4}), samples, config, dirs,
            "qc") as run_parallel:
        with profile.report("quality control", dirs):
            samples = qcsummary.generate_parallel(samples, run_parallel)
        with profile.report("create SummarizedExperiment object", dirs):
            samples = rnaseq.load_summarizedexperiment(samples)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
        with profile.report("bcbioRNAseq loading", dirs):
            tools_on = dd.get_in_samples(samples, dd.get_tools_on)
            bcbiornaseq_on = tools_on and "bcbiornaseq" in tools_on
            if bcbiornaseq_on:
                if len(samples) < 3:
                    logger.warn(
                        "bcbioRNASeq needs at least three samples total, skipping."
                    )
                elif len(samples) > 100:
                    logger.warn("Over 100 samples, skipping bcbioRNASeq.")
                else:
                    run_parallel("run_bcbiornaseqload", [sample])
    logger.info("Timing: finished")
    return samples
Example #51
0
def variant2pipeline(config, run_info_yaml, parallel, dirs, samples):
    ## Alignment and preparation requiring the entire input file (multicore cluster)
    with prun.start(
            _wres(
                parallel, ["aligner", "samtools", "sambamba"],
                (["reference", "fasta"], ["reference", "aligner"], ["files"])),
            samples,
            config,
            dirs,
            "multicore",
            multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("organize samples", dirs):
            samples = run_parallel("organize_samples", [[
                dirs, config, run_info_yaml,
                [x[0]["description"] for x in samples]
            ]])
        ww = initialize_watcher(samples)
        with profile.report("alignment preparation", dirs):
            samples = run_parallel("prep_align_inputs", samples)
            ww.report("prep_align_inputs", samples)
            samples = run_parallel("disambiguate_split", [samples])
        with profile.report("alignment", dirs):
            samples = run_parallel("process_alignment", samples)
            ww.report("process_alignment", samples)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
        with profile.report("callable regions", dirs):
            samples = run_parallel("prep_samples", [samples])
            ww.report("prep_samples", samples)
            samples = run_parallel("postprocess_alignment", samples)
            ww.report("postprocess_alignment", samples)
            samples = run_parallel("combine_sample_regions", [samples])
            samples = region.clean_sample_data(samples)
            ww.report("combine_sample_regions", samples)
        with profile.report("hla typing", dirs):
            samples = hla.run(samples, run_parallel)
            ww.report("call_hla", samples)

    ## Variant calling on sub-regions of the input file (full cluster)
    with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                    samples,
                    config,
                    dirs,
                    "full",
                    multiplier=region.get_max_counts(samples),
                    max_multicore=1) as run_parallel:
        with profile.report("alignment post-processing", dirs):
            samples = region.parallel_prep_region(samples, run_parallel)
        with profile.report("variant calling", dirs):
            samples = genotype.parallel_variantcall_region(
                samples, run_parallel)

    ## Finalize variants, BAMs and population databases (per-sample multicore cluster)
    with prun.start(_wres(parallel, [
            "gatk", "gatk-vqsr", "snpeff", "bcbio_variation", "gemini",
            "samtools", "fastqc", "sambamba", "bcbio-variation-recall",
            "qsignature", "svcaller"
    ]),
                    samples,
                    config,
                    dirs,
                    "multicore2",
                    multiplier=structural.parallel_multiplier(
                        samples)) as run_parallel:
        with profile.report("joint squaring off/backfilling", dirs):
            samples = joint.square_off(samples, run_parallel)
        with profile.report("variant post-processing", dirs):
            samples = run_parallel("postprocess_variants", samples)
            samples = run_parallel("split_variants_by_sample", samples)
        with profile.report("prepped BAM merging", dirs):
            samples = region.delayed_bamprep_merge(samples, run_parallel)
        with profile.report("validation", dirs):
            samples = run_parallel("compare_to_rm", samples)
            samples = genotype.combine_multiple_callers(samples)
        with profile.report("ensemble calling", dirs):
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
        with profile.report("validation summary", dirs):
            samples = validate.summarize_grading(samples)
        with profile.report("structural variation precall", dirs):
            samples = structural.run(samples, run_parallel, "precall")
        with profile.report("structural variation", dirs):
            samples = structural.run(samples, run_parallel, "initial")
        with profile.report("structural variation", dirs):
            samples = structural.run(samples, run_parallel, "standard")
        with profile.report("structural variation ensemble", dirs):
            samples = structural.run(samples, run_parallel, "ensemble")
        with profile.report("structural variation validation", dirs):
            samples = run_parallel("validate_sv", samples)
        with profile.report("heterogeneity", dirs):
            samples = heterogeneity.run(samples, run_parallel)
        with profile.report("population database", dirs):
            samples = population.prep_db_parallel(samples, run_parallel)
        with profile.report("quality control", dirs):
            ww.report("pre_qc", samples)
            samples = qcsummary.generate_parallel(samples, run_parallel)
            ww.report("qc_summary", samples)
        with profile.report("archive", dirs):
            samples = archive.compress(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples
Example #52
0
def variant2pipeline(config, run_info_yaml, parallel, dirs, samples):
    ## Alignment and preparation requiring the entire input file (multicore cluster)
    with prun.start(_wres(parallel, ["aligner", "samtools", "sambamba"],
                            (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                    samples, config, dirs, "multicore",
                    multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
        with profile.report("organize samples", dirs):
            samples = run_parallel("organize_samples", [[dirs, config, run_info_yaml,
                                                            [x[0]["description"] for x in samples]]])
        ww = WorldWatcher(dirs["work"], is_on=any([dd.get_cwl_reporting(d[0]) for d in samples]))
        ww.initialize(samples)
        with profile.report("alignment preparation", dirs):
            samples = run_parallel("prep_align_inputs", samples)
            ww.report("prep_align_inputs", samples)
            samples = run_parallel("disambiguate_split", [samples])
        with profile.report("alignment", dirs):
            samples = run_parallel("process_alignment", samples)
            ww.report("process_alignment", samples)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
        with profile.report("callable regions", dirs):
            samples = run_parallel("prep_samples", [samples])
            ww.report("prep_samples", samples)
            samples = run_parallel("postprocess_alignment", samples)
            ww.report("postprocess_alignment", samples)
            samples = run_parallel("combine_sample_regions", [samples])
            samples = region.clean_sample_data(samples)
            ww.report("combine_sample_regions", samples)
        with profile.report("structural variation initial", dirs):
            samples = structural.run(samples, run_parallel, "initial")
            ww.report("sv_initial", samples)
        with profile.report("hla typing", dirs):
            samples = hla.run(samples, run_parallel)
            ww.report("call_hla", samples)

    ## Variant calling on sub-regions of the input file (full cluster)
    with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                    samples, config, dirs, "full",
                    multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:
        with profile.report("alignment post-processing", dirs):
            samples = region.parallel_prep_region(samples, run_parallel)
        with profile.report("variant calling", dirs):
            samples = genotype.parallel_variantcall_region(samples, run_parallel)

    ## Finalize variants, BAMs and population databases (per-sample multicore cluster)
    with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation",
                                     "gemini", "samtools", "fastqc", "bamtools",
                                     "bcbio-variation-recall", "qsignature",
                                     "svcaller"]),
                    samples, config, dirs, "multicore2",
                    multiplier=structural.parallel_multiplier(samples)) as run_parallel:
        with profile.report("joint squaring off/backfilling", dirs):
            samples = joint.square_off(samples, run_parallel)
        with profile.report("variant post-processing", dirs):
            samples = run_parallel("postprocess_variants", samples)
            samples = run_parallel("split_variants_by_sample", samples)
        with profile.report("prepped BAM merging", dirs):
            samples = region.delayed_bamprep_merge(samples, run_parallel)
        with profile.report("validation", dirs):
            samples = run_parallel("compare_to_rm", samples)
            samples = genotype.combine_multiple_callers(samples)
        with profile.report("ensemble calling", dirs):
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
        with profile.report("validation summary", dirs):
            samples = validate.summarize_grading(samples)
        with profile.report("structural variation final", dirs):
            samples = structural.run(samples, run_parallel, "standard")
        with profile.report("structural variation ensemble", dirs):
            samples = structural.run(samples, run_parallel, "ensemble")
        with profile.report("structural variation validation", dirs):
            samples = run_parallel("validate_sv", samples)
        with profile.report("heterogeneity", dirs):
            samples = heterogeneity.run(samples, run_parallel)
        with profile.report("population database", dirs):
            samples = population.prep_db_parallel(samples, run_parallel)
        with profile.report("quality control", dirs):
            ww.report("pre_qc", samples)
            samples = qcsummary.generate_parallel(samples, run_parallel)
            ww.report("qc_summary", samples)
        with profile.report("archive", dirs):
            samples = archive.compress(samples, run_parallel)
        with profile.report("upload", dirs):
            samples = run_parallel("upload_samples", samples)
            for sample in samples:
                run_parallel("upload_samples_project", [sample])
    logger.info("Timing: finished")
    return samples