def _prep_grabix_indexes(in_files, dirs, config): if in_files[0].endswith(".bam") and in_files[1] is None: out = _bgzip_from_bam(in_files[0], dirs, config) else: out = [_bgzip_from_fastq(x, dirs, config) if x else None for x in in_files] items = [[{"bgzip_file": x, "config": copy.deepcopy(config)}] for x in out if x] run_multicore(_grabix_index, items, config, config["algorithm"].get("num_cores", 1)) return out
def _do_merge(orig_files, out_file, config, region): """Do the actual work of merging with bcftools merge. """ if not utils.file_exists(out_file): with file_transaction(out_file) as tx_out_file: with short_filenames(run_multicore(p_bgzip_and_index, [[x, config] for x in orig_files], config)) as fs: prep_files = " ".join(fs) bcftools = config_utils.get_program("bcftools", config) output_type = "z" if out_file.endswith(".gz") else "v" region_str = "-r {}".format(region) if region else "" cmd = "{bcftools} merge -o {output_type} {region_str} {prep_files} > {tx_out_file}" do.run(cmd.format(**locals()), "Merge variants") if out_file.endswith(".gz"): bgzip_and_index(out_file, config) return out_file
def parallel_combine_variants(orig_files, out_file, ref_file, config, run_parallel): """Combine variants in parallel by chromosome, concatenating final outputs. """ file_key = "vcf_files" def split_by_region(data): base, ext = utils.splitext_plus(os.path.basename(out_file)) args = [] for region in [x.name for x in ref.file_contigs(ref_file, config)]: region_out = os.path.join(os.path.dirname(out_file), "%s-regions" % base, "%s-%s%s" % (base, region, ext)) utils.safe_makedir(os.path.dirname(region_out)) args.append((region_out, ref_file, config, region)) return out_file, args config = copy.deepcopy(config) config["file_key"] = file_key prep_files = run_multicore(p_bgzip_and_index, [[x, config] for x in orig_files], config) items = [[{file_key: prep_files}]] parallel_split_combine(items, split_by_region, run_parallel, "merge_variant_files", "concat_variant_files", file_key, ["region", "sam_ref", "config"], split_outfile_i=0) return out_file