Ejemplo n.º 1
0
def combine_variant_files(orig_files, out_file, ref_file, config,
                          quiet_out=True, region=None):
    """Combine VCF files from the same sample into a single output file.

    Handles cases where we split files into SNPs/Indels for processing then
    need to merge back into a final file.
    """
    in_pipeline = False
    if isinstance(orig_files, dict):
        file_key = config["file_key"]
        in_pipeline = True
        orig_files = orig_files[file_key]
    if not utils.file_exists(out_file):
        with file_transaction(config, out_file) as tx_out_file:
            exist_files = [x for x in orig_files if os.path.exists(x)]
            ready_files = run_multicore(p_bgzip_and_index, [[x, config] for x in exist_files], config)
            dict_file = "%s.dict" % utils.splitext_plus(ref_file)[0]
            cores = dd.get_num_cores({"config": config})
            memscale = {"magnitude": 0.9 * cores, "direction": "increase"} if cores > 1 else None
            cmd = ["picard"] + broad.get_picard_opts(config, memscale) + \
                  ["MergeVcfs", "D=%s" % dict_file, "O=%s" % tx_out_file] + \
                  ["I=%s" % f for f in ready_files]
            cmd = "%s && %s" % (utils.get_java_clprep(), " ".join(cmd))
            do.run(cmd, "Combine variant files")
    if out_file.endswith(".gz"):
        bgzip_and_index(out_file, config)
    if in_pipeline:
        return [{file_key: out_file, "region": region, "sam_ref": ref_file, "config": config}]
    else:
        return out_file
Ejemplo n.º 2
0
def combine_variant_files(orig_files, out_file, ref_file, config,
                          quiet_out=True, region=None):
    """Combine VCF files from the same sample into a single output file.

    Handles cases where we split files into SNPs/Indels for processing then
    need to merge back into a final file.
    """
    in_pipeline = False
    if isinstance(orig_files, dict):
        file_key = config["file_key"]
        in_pipeline = True
        orig_files = orig_files[file_key]
    if not utils.file_exists(out_file):
        with file_transaction(config, out_file) as tx_out_file:
            exist_files = [x for x in orig_files if os.path.exists(x)]
            ready_files = run_multicore(p_bgzip_and_index, [[x, config] for x in exist_files], config)
            dict_file = "%s.dict" % utils.splitext_plus(ref_file)[0]
            cores = dd.get_num_cores({"config": config})
            memscale = {"magnitude": 0.9 * cores, "direction": "increase"} if cores > 1 else None
            cmd = ["picard"] + broad.get_picard_opts(config, memscale) + \
                  ["MergeVcfs", "D=%s" % dict_file, "O=%s" % tx_out_file] + \
                  ["I=%s" % f for f in ready_files]
            cmd = "%s && %s" % (utils.get_java_clprep(), " ".join(cmd))
            do.run(cmd, "Combine variant files")
    if out_file.endswith(".gz"):
        bgzip_and_index(out_file, config)
    if in_pipeline:
        return [{file_key: out_file, "region": region, "sam_ref": ref_file, "config": config}]
    else:
        return out_file