def combine_variant_files(orig_files, out_file, ref_file, config, quiet_out=True, region=None): """Combine VCF files from the same sample into a single output file. Handles cases where we split files into SNPs/Indels for processing then need to merge back into a final file. """ in_pipeline = False if isinstance(orig_files, dict): file_key = config["file_key"] in_pipeline = True orig_files = orig_files[file_key] if not utils.file_exists(out_file): with file_transaction(config, out_file) as tx_out_file: exist_files = [x for x in orig_files if os.path.exists(x)] ready_files = run_multicore(p_bgzip_and_index, [[x, config] for x in exist_files], config) dict_file = "%s.dict" % utils.splitext_plus(ref_file)[0] cores = dd.get_num_cores({"config": config}) memscale = {"magnitude": 0.9 * cores, "direction": "increase"} if cores > 1 else None cmd = ["picard"] + broad.get_picard_opts(config, memscale) + \ ["MergeVcfs", "D=%s" % dict_file, "O=%s" % tx_out_file] + \ ["I=%s" % f for f in ready_files] cmd = "%s && %s" % (utils.get_java_clprep(), " ".join(cmd)) do.run(cmd, "Combine variant files") if out_file.endswith(".gz"): bgzip_and_index(out_file, config) if in_pipeline: return [{file_key: out_file, "region": region, "sam_ref": ref_file, "config": config}] else: return out_file
def combine_variant_files(orig_files, out_file, ref_file, config, quiet_out=True, region=None): """Combine VCF files from the same sample into a single output file. Handles cases where we split files into SNPs/Indels for processing then need to merge back into a final file. """ in_pipeline = False if isinstance(orig_files, dict): file_key = config["file_key"] in_pipeline = True orig_files = orig_files[file_key] if not utils.file_exists(out_file): with file_transaction(config, out_file) as tx_out_file: exist_files = [x for x in orig_files if os.path.exists(x)] ready_files = run_multicore(p_bgzip_and_index, [[x, config] for x in exist_files], config) dict_file = "%s.dict" % utils.splitext_plus(ref_file)[0] cores = dd.get_num_cores({"config": config}) memscale = {"magnitude": 0.9 * cores, "direction": "increase"} if cores > 1 else None cmd = ["picard"] + broad.get_picard_opts(config, memscale) + \ ["MergeVcfs", "D=%s" % dict_file, "O=%s" % tx_out_file] + \ ["I=%s" % f for f in ready_files] cmd = "%s && %s" % (utils.get_java_clprep(), " ".join(cmd)) do.run(cmd, "Combine variant files") if out_file.endswith(".gz"): bgzip_and_index(out_file, config) if in_pipeline: return [{file_key: out_file, "region": region, "sam_ref": ref_file, "config": config}] else: return out_file