def parallel_callable_loci(in_bam, ref_file, data): config = copy.deepcopy(data["config"]) num_cores = config["algorithm"].get("num_cores", 1) out_dir = utils.safe_makedir( os.path.join(dd.get_work_dir(data), "align", dd.get_sample_name(data))) data = { "work_bam": in_bam, "config": config, "reference": data["reference"], "dirs": { "out": out_dir } } parallel = { "type": "local", "cores": num_cores, "module": "bcbio.distributed" } items = [[data]] with prun.start(parallel, items, config, multiplier=int(num_cores)) as runner: split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam", remove_alts=True) out = parallel_split_combine(items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"])[0] return out[0]["callable_bed"]
def parallel_callable_loci(in_bam, ref_file, config): num_cores = config["algorithm"].get("num_cores", 1) config = copy.deepcopy(config) data = { "work_bam": in_bam, "config": config, "reference": { "fasta": { "base": ref_file } } } parallel = { "type": "local", "cores": num_cores, "module": "bcbio.distributed" } items = [[data]] with prun.start(parallel, items, config, multiplier=int(num_cores)) as runner: split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam") out = parallel_split_combine(items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"])[0] return out[0]["callable_bed"]
def parallel_callable_loci(in_bam, ref_file, config): num_cores = config["algorithm"].get("num_cores", 1) data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config} parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"} runner = parallel_runner(parallel, {}, config) split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam") out = parallel_split_combine([[data]], split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"])[0] return out[0]["callable_bed"]
def parallel_callable_loci(in_bam, ref_file, data): config = copy.deepcopy(data["config"]) num_cores = config["algorithm"].get("num_cores", 1) data = {"work_bam": in_bam, "config": config, "reference": data["reference"]} parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"} items = [[data]] with prun.start(parallel, items, config, multiplier=int(num_cores)) as runner: split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam", remove_alts=True) out = parallel_split_combine( items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"] )[0] return out[0]["callable_bed"]
def parallel_callable_loci(in_bam, ref_file, config): num_cores = config["algorithm"].get("num_cores", 1) config = copy.deepcopy(config) config["algorithm"]["memory_adjust"] = {"direction": "decrease", "magnitude": 2} data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config} parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"} items = [[data]] with prun.start(parallel, items, config) as runner: split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam") out = parallel_split_combine( items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"] )[0] return out[0]["callable_bed"]
def parallel_callable_loci(in_bam, ref_file, config): num_cores = config["algorithm"].get("num_cores", 1) config = copy.deepcopy(config) config["algorithm"]["memory_adjust"] = {"direction": "decrease", "magnitude": 2} data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config} parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"} items = [[data]] with prun.start(parallel, items, config, multiplier=int(num_cores)) as runner: split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam") out = parallel_split_combine(items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"])[0] return out[0]["callable_bed"]
def parallel_callable_loci(in_bam, ref_file, config): num_cores = config["algorithm"].get("num_cores", 1) data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config} parallel = { "type": "local", "cores": num_cores, "module": "bcbio.distributed" } runner = parallel_runner(parallel, {}, config) split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam") out = parallel_split_combine([[data]], split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"])[0] return out[0]["callable_bed"]
def parallel_realign_sample(sample_info, parallel_fn): """Realign samples, running in parallel over individual chromosomes. """ to_process = [] finished = [] for x in sample_info: if x[0]["config"]["algorithm"].get("realign", True): to_process.append(x) else: finished.append(x) if len(to_process) > 0: file_key = "work_bam" split_fn = process_bam_by_chromosome("-realign.bam", file_key, default_targets=["nochr"]) processed = parallel_split_combine(to_process, split_fn, parallel_fn, "realign_sample", "combine_bam", file_key, ["config"]) finished.extend(processed) return finished
def parallel_variantcall(sample_info, parallel_fn): """Provide sample genotyping, running in parallel over individual chromosomes. """ to_process = [] finished = [] for x in sample_info: if x[0]["config"]["algorithm"]["snpcall"]: to_process.extend(_handle_multiple_variantcallers(x)) else: finished.append(x) if len(to_process) > 0: split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam", dir_ext_fn = _get_variantcaller) processed = parallel_split_combine(to_process, split_fn, parallel_fn, "variantcall_sample", "combine_variant_files", "vrn_file", ["sam_ref", "config"]) finished.extend(processed) return finished
def parallel_variantcall(sample_info, parallel_fn): """Provide sample genotyping, running in parallel over individual chromosomes. """ to_process = [] finished = [] for x in sample_info: if get_variantcaller(x[0]): to_process.extend(handle_multiple_variantcallers(x)) else: finished.append(x) if len(to_process) > 0: split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam", dir_ext_fn=get_variantcaller) processed = grouped_parallel_split_combine( to_process, split_fn, multi.group_batches, parallel_fn, "variantcall_sample", "split_variants_by_sample", "combine_variant_files", "vrn_file", ["sam_ref", "config"]) finished.extend(processed) return finished
def parallel_variantcall(sample_info, parallel_fn): """Provide sample genotyping, running in parallel over individual chromosomes. """ to_process = [] finished = [] for x in sample_info: if get_variantcaller(x[0]): to_process.extend(handle_multiple_variantcallers(x)) else: finished.append(x) if len(to_process) > 0: split_fn = process_bam_by_chromosome("-variants.vcf.gz", "work_bam", dir_ext_fn=get_variantcaller) processed = grouped_parallel_split_combine( to_process, split_fn, multi.group_batches, parallel_fn, "variantcall_sample", "split_variants_by_sample", "combine_variant_files", "vrn_file", ["sam_ref", "config"]) finished.extend(processed) return finished
def parallel_realign_sample(sample_info, parallel_fn): """Realign samples, running in parallel over individual chromosomes. """ to_process = [] finished = [] for x in sample_info: if x[0]["config"]["algorithm"].get("realign", True): to_process.append(x) else: finished.append(x) if len(to_process) > 0: file_key = "work_bam" split_fn = process_bam_by_chromosome("-realign.bam", file_key, default_targets=["nochr"]) processed = parallel_split_combine(to_process, split_fn, parallel_fn, "realign_sample", "combine_bam", file_key, ["config"]) finished.extend(processed) return finished
def parallel_write_recal_bam(xs, parallel_fn): """Rewrite a recalibrated BAM file in parallel, working off each chromosome. """ to_process = [] finished = [] for x in xs: if x[0]["config"]["algorithm"].get("recalibrate", True): to_process.append(x) else: finished.append(x) if len(to_process) > 0: file_key = "work_bam" split_fn = process_bam_by_chromosome("-gatkrecal.bam", file_key, default_targets=["nochr"]) processed = parallel_split_combine(to_process, split_fn, parallel_fn, "write_recal_bam", "combine_bam", file_key, ["config"]) finished.extend(processed) # Save diskspace from original to recalibrated #save_diskspace(data["work_bam"], "Recalibrated to %s" % recal_bam, # data["config"]) return finished
def parallel_write_recal_bam(xs, parallel_fn): """Rewrite a recalibrated BAM file in parallel, working off each chromosome. """ to_process = [] finished = [] for x in xs: if x[0]["config"]["algorithm"].get("recalibrate", True): to_process.append(x) else: finished.append(x) if len(to_process) > 0: file_key = "work_bam" split_fn = process_bam_by_chromosome("-gatkrecal.bam", file_key, default_targets=["nochr"]) processed = parallel_split_combine(to_process, split_fn, parallel_fn, "write_recal_bam", "combine_bam", file_key, ["config"]) finished.extend(processed) # Save diskspace from original to recalibrated #save_diskspace(data["work_bam"], "Recalibrated to %s" % recal_bam, # data["config"]) return finished