Exemple #1
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full", (list(x) + [config_file] for x in lane_items))
     samples = callable.combine_sample_regions(samples)
     # Handle all variant calling on sub-regions of the input file
     samples = region.parallel_prep_region(samples, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = combine_multiple_callers(samples)
     samples = run_parallel("combine_calls", samples)
     return samples
Exemple #2
0
    def run(self, config, config_file, run_parallel, parallel, dirs,
            lane_items):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["align_prep_full"],
                             lane_items, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel(
                "align_prep_full",
                [list(x) + [config_file] for x in lane_items])
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel,
                             "full", ["piped_bamprep", "variantcall_sample"],
                             samples,
                             dirs,
                             config,
                             multiplier=len(regions["analysis"])) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2",
                             ["prep_gemini_db", "delayed_bam_merge"], samples,
                             dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemple #3
0
    def run(self, config, config_file, run_parallel, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["process_alignment", "postprocess_alignment"],
                             samples, dirs, config,
                             multiplier=alignprep.parallel_multiplier(samples)) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel, "full", ["piped_bamprep", "variantcall_sample"],
                             samples, dirs, config,
                             multiplier=len(regions["analysis"]), max_multicore=1) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions, run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2", ["prep_gemini_db", "delayed_bam_merge"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemple #4
0
    def run(self, config, config_file, run_parallel, parallel, dirs,
            lane_items):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["align_prep_full"],
                             lane_items, dirs["work"], config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel(
                "align_prep_full",
                [list(x) + [config_file] for x in lane_items])
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(
                parallel,
                "full", ["piped_bamprep", "variantcall_sample"],
                samples,
                dirs["work"],
                config,
                multiplier=len(regions["analysis"])) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs["work"], config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
            logger.info("Timing: finished")
        return samples
Exemple #5
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full", (list(x) + [config_file] for x in lane_items))
     regions = callable.combine_sample_regions(samples)
     samples = region.add_region_info(samples, regions)
     # Handle all variant calling on sub-regions of the input file
     samples = region.clean_sample_data(samples)
     samples = region.parallel_prep_region(samples, regions, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = population.prep_db_parallel(samples, run_parallel)
     samples = region.delayed_bamprep_merge(samples, run_parallel)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     samples = validate.summarize_grading(samples)
     return samples
Exemple #6
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full",
                            (list(x) + [config_file] for x in lane_items))
     regions = callable.combine_sample_regions(samples)
     samples = region.add_region_info(samples, regions)
     # Handle all variant calling on sub-regions of the input file
     samples = region.clean_sample_data(samples)
     samples = region.parallel_prep_region(samples, regions, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = population.prep_db_parallel(samples, run_parallel)
     samples = region.delayed_bamprep_merge(samples, run_parallel)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     samples = validate.summarize_grading(samples)
     return samples
Exemple #7
0
def combine_sample_regions(*args):
    return callable.combine_sample_regions(*args)
def combine_sample_regions(*args):
    return callable.combine_sample_regions(*args)
Exemple #9
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(
            _wres(parallel, ["aligner", "gatk"], (["reference", "fasta"], ["reference", "aligner"], ["files"])),
            samples,
            config,
            dirs,
            "multicore",
            multiplier=alignprep.parallel_multiplier(samples),
        ) as run_parallel:
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(
            _wres(parallel, ["gatk", "picard", "variantcaller"]),
            samples,
            config,
            dirs,
            "full",
            multiplier=len(regions["analysis"]),
            max_multicore=1,
        ) as run_parallel:
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions, run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(
            _wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]), samples, config, dirs, "persample"
        ) as run_parallel:
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(
            _wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation", "bcbio-variation-recall"]),
            samples,
            config,
            dirs,
            "multicore2",
        ) as run_parallel:
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Exemple #10
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(
                parallel, ["aligner", "gatk"],
            (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples,
                        config,
                        dirs,
                        "multicore",
                        multiplier=alignprep.parallel_multiplier(
                            samples)) as run_parallel:
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples,
                        config,
                        dirs,
                        "full",
                        multiplier=len(regions["analysis"]),
                        max_multicore=1) as run_parallel:
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(
                _wres(parallel,
                      ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                samples, config, dirs, "persample") as run_parallel:
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(
                _wres(parallel, [
                    "gemini", "samtools", "fastqc", "bamtools",
                    "bcbio_variation", "bcbio-variation-recall"
                ]), samples, config, dirs, "multicore2") as run_parallel:
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples