Beispiel #1
0
def run_main(config,
             config_file,
             work_dir,
             parallel,
             fc_dir=None,
             run_info_yaml=None):
    """Run toplevel analysis, processing a set of input files.

    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    setup_logging(config)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)
    fastq_dir, galaxy_dir, config_dir = _get_full_paths(
        get_fastq_dir(fc_dir) if fc_dir else None, config, config_file)
    config_file = os.path.join(config_dir, os.path.basename(config_file))
    dirs = {
        "fastq": fastq_dir,
        "galaxy": galaxy_dir,
        "work": work_dir,
        "flowcell": fc_dir,
        "config": config_dir
    }
    config = _set_resources(parallel, config)
    run_parallel = parallel_runner(parallel, dirs, config, config_file)

    # process each flowcell lane
    run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"],
                                           fc_name)
    lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items)
    lane_items = run_parallel("process_lane", lanes)
    align_items = run_parallel("process_alignment", lane_items)
    # process samples, potentially multiplexed across multiple lanes
    samples = organize_samples(align_items, dirs, config_file)
    samples = run_parallel("merge_sample", samples)
    samples = run_parallel("prep_recal", samples)
    samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
    samples = parallel_realign_sample(samples, run_parallel)
    samples = parallel_variantcall(samples, run_parallel)
    samples = run_parallel("postprocess_variants", samples)
    samples = combine_multiple_callers(samples)
    samples = run_parallel("detect_sv", samples)
    samples = run_parallel("combine_calls", samples)
    run_parallel("process_sample", samples)
    run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
    write_project_summary(samples)
    write_metrics(run_info, fc_name, fc_date, dirs)
Beispiel #2
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Beispiel #3
0
 def run(self, config, config_file, run_parallel, dirs, lane_items):
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Beispiel #4
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     raise NotImplementedError("`variant` processing is deprecated: please use `variant2`"
                               "The next version will alias variant to the new variant2 pipeline")
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Beispiel #5
0
 def run(self, config, config_file, run_parallel, parallel, dirs, lane_items):
     raise NotImplementedError("`variant` processing is deprecated: please use `variant2`"
                               "The next version will alias variant to the new variant2 pipeline")
     lane_items = run_parallel("trim_lane", lane_items)
     align_items = run_parallel("process_alignment", lane_items)
     # process samples, potentially multiplexed across multiple lanes
     samples = organize_samples(align_items, dirs, config_file)
     samples = run_parallel("merge_sample", samples)
     samples = run_parallel("prep_recal", samples)
     samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
     samples = parallel_realign_sample(samples, run_parallel)
     samples = parallel_variantcall(samples, run_parallel)
     samples = run_parallel("postprocess_variants", samples)
     samples = combine_multiple_callers(samples)
     samples = ensemble.combine_calls_parallel(samples, run_parallel)
     samples = run_parallel("detect_sv", samples)
     samples = qcsummary.generate_parallel(samples, run_parallel)
     run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
     return samples
Beispiel #6
0
def run_main(config, config_file, work_dir, parallel,
             fc_dir=None, run_info_yaml=None):
    """Run toplevel analysis, processing a set of input files.

    config_file -- Main YAML configuration file with system parameters
    fc_dir -- Directory of fastq files to process
    run_info_yaml -- YAML configuration file specifying inputs to process
    """
    setup_logging(config)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)
    fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir) if fc_dir else None,
                                                        config, config_file)
    config_file = os.path.join(config_dir, os.path.basename(config_file))
    dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir,
            "work": work_dir, "flowcell": fc_dir, "config": config_dir}
    config = _set_resources(parallel, config)
    run_parallel = parallel_runner(parallel, dirs, config, config_file)

    # process each flowcell lane
    run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"], fc_name)
    lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items)
    lane_items = run_parallel("process_lane", lanes)
    align_items = run_parallel("process_alignment", lane_items)
    # process samples, potentially multiplexed across multiple lanes
    samples = organize_samples(align_items, dirs, config_file)
    samples = run_parallel("merge_sample", samples)
    samples = run_parallel("prep_recal", samples)
    samples = recalibrate.parallel_write_recal_bam(samples, run_parallel)
    samples = parallel_realign_sample(samples, run_parallel)
    samples = parallel_variantcall(samples, run_parallel)
    samples = run_parallel("postprocess_variants", samples)
    samples = combine_multiple_callers(samples)
    samples = run_parallel("detect_sv", samples)
    samples = run_parallel("combine_calls", samples)
    run_parallel("process_sample", samples)
    run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]})
    write_project_summary(samples)
    write_metrics(run_info, fc_name, fc_date, dirs)