Ejemplo n.º 1
0
 def run(self, config, config_file, parallel, dirs, lane_items):
     ## Alignment and preparation requiring the entire input file (multicore cluster)
     with prun.start(_wres(parallel, ["aligner"]),
                     lane_items, config, dirs, "multicore") as run_parallel:
         with profile.report("alignment", dirs):
             samples = run_parallel("process_alignment", lane_items)
         with profile.report("callable regions", dirs):
             samples = run_parallel("postprocess_alignment", samples)
             regions = run_parallel("combine_sample_regions", [samples])[0]
             samples = region.add_region_info(samples, regions)
             samples = region.clean_sample_data(samples)
     ## Processing on sub regions
     with prun.start(_wres(parallel, ["gatk", "picard", "samtools"]),
                     samples, config, dirs, "full",
                     multiplier=len(regions["analysis"]), max_multicore=1) as run_parallel:
         with profile.report("alignment post-processing", dirs):
             samples = region.parallel_prep_region(samples, regions, run_parallel)
             samples = region.parallel_variantcall_region(samples, run_parallel)
     print len(samples)
     ## Finalize BAMs and QC
     with prun.start(_wres(parallel, ["fastqc", "bamtools", "samtools"]),
                     samples, config, dirs, "multicore2") as run_parallel:
         with profile.report("prepped BAM merging", dirs):
             samples = region.delayed_bamprep_merge(samples, run_parallel)
         print len(samples)
         with profile.report("quality control", dirs):
             samples = qcsummary.generate_parallel(samples, run_parallel)
     logger.info("Timing: finished")
     return samples
Ejemplo n.º 2
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full", (list(x) + [config_file] for x in lane_items))
     samples = callable.combine_sample_regions(samples)
     # Handle all variant calling on sub-regions of the input file
     samples = region.parallel_prep_region(samples, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = combine_multiple_callers(samples)
     samples = run_parallel("combine_calls", samples)
     return samples
Ejemplo n.º 3
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(parallel, ["aligner", "gatk"],
                              (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples, config, dirs, "multicore",
                        multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:
            with profile.report("alignment preparation", dirs):
                samples = run_parallel("prep_align_inputs", samples)
                samples = disambiguate.split(samples)
            with profile.report("alignment", dirs):
                samples = run_parallel("process_alignment", samples)
                samples = alignprep.merge_split_alignments(samples, run_parallel)
                samples = disambiguate.resolve(samples, run_parallel)
            with profile.report("callable regions", dirs):
                samples = run_parallel("postprocess_alignment", samples)
                regions = run_parallel("combine_sample_regions", [samples])[0]
                samples = region.add_region_info(samples, regions)
                samples = region.clean_sample_data(samples)
            with profile.report("coverage", dirs):
                samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples, config, dirs, "full",
                        multiplier=len(regions["analysis"]), max_multicore=1) as run_parallel:
            with profile.report("alignment post-processing", dirs):
                samples = region.parallel_prep_region(samples, regions, run_parallel)
            with profile.report("variant calling", dirs):
                samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                        samples, config, dirs, "persample") as run_parallel:
            with profile.report("variant post-processing", dirs):
                samples = run_parallel("postprocess_variants", samples)
            with profile.report("validation", dirs):
                samples = run_parallel("compare_to_rm", samples)
                samples = combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",
                                         "bcbio-variation-recall"]),
                        samples, config, dirs, "multicore2") as run_parallel:
            with profile.report("prepped BAM merging", dirs):
                samples = region.delayed_bamprep_merge(samples, run_parallel)
            with profile.report("ensemble calling", dirs):
                samples = ensemble.combine_calls_parallel(samples, run_parallel)
                samples = validate.summarize_grading(samples)
            with profile.report("structural variation", dirs):
                samples = structural.run(samples, run_parallel)
            with profile.report("population database", dirs):
                samples = population.prep_db_parallel(samples, run_parallel)
            with profile.report("quality control", dirs):
                samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Ejemplo n.º 4
0
    def run(self, config, config_file, run_parallel, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["process_alignment", "postprocess_alignment"],
                             samples, dirs, config,
                             multiplier=alignprep.parallel_multiplier(samples)) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel, "full", ["piped_bamprep", "variantcall_sample"],
                             samples, dirs, config,
                             multiplier=len(regions["analysis"]), max_multicore=1) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions, run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2", ["prep_gemini_db", "delayed_bam_merge"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Ejemplo n.º 5
0
    def run(self, config, config_file, run_parallel, parallel, dirs,
            lane_items):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["align_prep_full"],
                             lane_items, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel(
                "align_prep_full",
                [list(x) + [config_file] for x in lane_items])
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(parallel,
                             "full", ["piped_bamprep", "variantcall_sample"],
                             samples,
                             dirs,
                             config,
                             multiplier=len(regions["analysis"])) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        ## Finalizing BAMs and population databases, handle multicore computation
        with global_parallel(parallel, "multicore2",
                             ["prep_gemini_db", "delayed_bam_merge"], samples,
                             dirs, config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples
Ejemplo n.º 6
0
    def run(self, config, config_file, run_parallel, parallel, dirs,
            lane_items):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with global_parallel(parallel, "multicore", ["align_prep_full"],
                             lane_items, dirs["work"], config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment")
            samples = run_parallel(
                "align_prep_full",
                [list(x) + [config_file] for x in lane_items])
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)

        ## Variant calling on sub-regions of the input file (full cluster)
        with global_parallel(
                parallel,
                "full", ["piped_bamprep", "variantcall_sample"],
                samples,
                dirs["work"],
                config,
                multiplier=len(regions["analysis"])) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with global_parallel(parallel, "persample", ["postprocess_variants"],
                             samples, dirs["work"], config) as parallel:
            run_parallel = parallel_runner(parallel, dirs, config)
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            samples = combine_multiple_callers(samples)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
            logger.info("Timing: finished")
        return samples
Ejemplo n.º 7
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full", (list(x) + [config_file] for x in lane_items))
     regions = callable.combine_sample_regions(samples)
     samples = region.add_region_info(samples, regions)
     # Handle all variant calling on sub-regions of the input file
     samples = region.clean_sample_data(samples)
     samples = region.parallel_prep_region(samples, regions, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = population.prep_db_parallel(samples, run_parallel)
     samples = region.delayed_bamprep_merge(samples, run_parallel)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     samples = validate.summarize_grading(samples)
     return samples
Ejemplo n.º 8
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     # Handle alignment and preparation requiring the entire input file
     samples = run_parallel("align_prep_full",
                            (list(x) + [config_file] for x in lane_items))
     regions = callable.combine_sample_regions(samples)
     samples = region.add_region_info(samples, regions)
     # Handle all variant calling on sub-regions of the input file
     samples = region.clean_sample_data(samples)
     samples = region.parallel_prep_region(samples, regions, run_parallel)
     samples = region.parallel_variantcall_region(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = population.prep_db_parallel(samples, run_parallel)
     samples = region.delayed_bamprep_merge(samples, run_parallel)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     samples = validate.summarize_grading(samples)
     return samples
Ejemplo n.º 9
0
    def run(self, config, config_file, parallel, dirs, samples):
        ## Alignment and preparation requiring the entire input file (multicore cluster)
        with prun.start(_wres(
                parallel, ["aligner", "gatk"],
            (["reference", "fasta"], ["reference", "aligner"], ["files"])),
                        samples,
                        config,
                        dirs,
                        "multicore",
                        multiplier=alignprep.parallel_multiplier(
                            samples)) as run_parallel:
            logger.info("Timing: alignment")
            samples = run_parallel("prep_align_inputs", samples)
            samples = disambiguate.split(samples)
            samples = run_parallel("process_alignment", samples)
            samples = alignprep.merge_split_alignments(samples, run_parallel)
            samples = disambiguate.resolve(samples, run_parallel)
            samples = run_parallel("postprocess_alignment", samples)
            regions = callable.combine_sample_regions(samples)
            samples = region.add_region_info(samples, regions)
            samples = region.clean_sample_data(samples)
            logger.info("Timing: coverage")
            samples = coverage.summarize_samples(samples, run_parallel)

        ## Variant calling on sub-regions of the input file (full cluster)
        with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),
                        samples,
                        config,
                        dirs,
                        "full",
                        multiplier=len(regions["analysis"]),
                        max_multicore=1) as run_parallel:
            logger.info("Timing: alignment post-processing")
            samples = region.parallel_prep_region(samples, regions,
                                                  run_parallel)
            logger.info("Timing: variant calling")
            samples = region.parallel_variantcall_region(samples, run_parallel)

        ## Finalize variants (per-sample cluster)
        with prun.start(
                _wres(parallel,
                      ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),
                samples, config, dirs, "persample") as run_parallel:
            logger.info("Timing: variant post-processing")
            samples = run_parallel("postprocess_variants", samples)
            logger.info("Timing: validation")
            samples = run_parallel("compare_to_rm", samples)
            samples = combine_multiple_callers(samples)
        ## Finalizing BAMs and population databases, handle multicore computation
        with prun.start(
                _wres(parallel, [
                    "gemini", "samtools", "fastqc", "bamtools",
                    "bcbio_variation", "bcbio-variation-recall"
                ]), samples, config, dirs, "multicore2") as run_parallel:
            logger.info("Timing: prepped BAM merging")
            samples = region.delayed_bamprep_merge(samples, run_parallel)
            logger.info("Timing: ensemble calling")
            samples = ensemble.combine_calls_parallel(samples, run_parallel)
            samples = validate.summarize_grading(samples)
            logger.info("Timing: structural variation")
            samples = structural.run(samples, run_parallel)
            logger.info("Timing: population database")
            samples = population.prep_db_parallel(samples, run_parallel)
            logger.info("Timing: quality control")
            samples = qcsummary.generate_parallel(samples, run_parallel)
        logger.info("Timing: finished")
        return samples