Example #1
0
def _square_batch_bcbio_variation(data, region, bam_files, vrn_files, out_file,
                                  todo="square"):
    """Run squaring or merging analysis using bcbio.variation.recall.
    """
    ref_file = tz.get_in(("reference", "fasta", "base"), data)
    cores = tz.get_in(("config", "algorithm", "num_cores"), data, 1)
    resources = config_utils.get_resources("bcbio-variation-recall", data["config"])
    # adjust memory by cores but leave room for run program memory
    memcores = int(math.ceil(float(cores) / 5.0))
    jvm_opts = config_utils.adjust_opts(resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"]),
                                        {"algorithm": {"memory_adjust": {"direction": "increase",
                                                                         "magnitude": memcores}}})
    # Write unique VCFs and BAMs to input file
    input_file = "%s-inputs.txt" % os.path.splitext(out_file)[0]
    with open(input_file, "w") as out_handle:
        out_handle.write("\n".join(sorted(list(set(vrn_files)))) + "\n")
        if todo == "square":
            out_handle.write("\n".join(sorted(list(set(bam_files)))) + "\n")
    variantcaller = tz.get_in(("config", "algorithm", "jointcaller"), data).replace("-joint", "")
    cmd = ["bcbio-variation-recall", todo] + jvm_opts + broad.get_default_jvm_opts() + \
          ["-c", cores, "-r", bamprep.region_to_gatk(region)]
    if todo == "square":
        cmd += ["--caller", variantcaller]
    cmd += [out_file, ref_file, input_file]
    bcbio_env = utils.get_bcbio_env()
    cmd = " ".join(str(x) for x in cmd)
    do.run(cmd, "%s in region: %s" % (cmd, bamprep.region_to_gatk(region)), env=bcbio_env)
    return out_file
Example #2
0
def _square_batch_bcbio_variation(data, region, bam_files, vrn_files, out_file,
                                  todo="square"):
    """Run squaring or merging analysis using bcbio.variation.recall.
    """
    ref_file = tz.get_in(("reference", "fasta", "base"), data)
    cores = tz.get_in(("config", "algorithm", "num_cores"), data, 1)
    resources = config_utils.get_resources("bcbio-variation-recall", data["config"])
    # adjust memory by cores but leave room for run program memory
    memcores = int(math.ceil(float(cores) / 5.0))
    jvm_opts = config_utils.adjust_opts(resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"]),
                                        {"algorithm": {"memory_adjust": {"direction": "increase",
                                                                         "magnitude": memcores}}})
    # Write unique VCFs and BAMs to input file
    input_file = "%s-inputs.txt" % os.path.splitext(out_file)[0]
    with open(input_file, "w") as out_handle:
        out_handle.write("\n".join(sorted(list(set(vrn_files)))) + "\n")
        if todo == "square":
            out_handle.write("\n".join(sorted(list(set(bam_files)))) + "\n")
    variantcaller = tz.get_in(("config", "algorithm", "jointcaller"), data).replace("-joint", "")
    cmd = ["bcbio-variation-recall", todo] + jvm_opts + broad.get_default_jvm_opts() + \
          ["-c", cores, "-r", bamprep.region_to_gatk(region)]
    if todo == "square":
        cmd += ["--caller", variantcaller]
    cmd += [out_file, ref_file, input_file]
    do.run(cmd, "%s in region: %s" % (cmd, bamprep.region_to_gatk(region)))
    return out_file
Example #3
0
def _do_smash_calldiff(truth_vcf, eval_vcf, ref_file, out_dir, config):
    resources = config_utils.get_resources("smash", config)
    jvm_opts = resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"])
    cmd = ["smash"] + jvm_opts + broad.get_default_jvm_opts() + \
          ["--lhs_vcf", truth_vcf, "--rhs_vcf", eval_vcf,
           "--reference_fasta", ref_file, "--presorted"]
    do.run(cmd, "Compare files with SMaSH calldiff")
Example #4
0
def _do_smash_calldiff(truth_vcf, eval_vcf, ref_file, out_dir, config):
    resources = config_utils.get_resources("smash", config)
    jvm_opts = resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"])
    cmd = ["smash"] + jvm_opts + broad.get_default_jvm_opts() + \
          ["--lhs_vcf", truth_vcf, "--rhs_vcf", eval_vcf,
           "--reference_fasta", ref_file, "--presorted"]
    do.run(cmd, "Compare files with SMaSH calldiff")
Example #5
0
def bcbio_variation_comparison(config_file, base_dir, data):
    """Run a variant comparison using the bcbio.variation toolkit, given an input configuration.
    """
    tmp_dir = utils.safe_makedir(os.path.join(base_dir, "tmp"))
    resources = config_utils.get_resources("bcbio_variation", data["config"])
    jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx2g"])
    cmd = ["bcbio-variation"] + jvm_opts + broad.get_default_jvm_opts(tmp_dir) + \
          ["variant-compare", config_file]
    do.run(cmd, "Comparing variant calls using bcbio.variation", data)
Example #6
0
def bcbio_variation_comparison(config_file, base_dir, data):
    """Run a variant comparison using the bcbio.variation toolkit, given an input configuration.
    """
    tmp_dir = utils.safe_makedir(os.path.join(base_dir, "tmp"))
    resources = config_utils.get_resources("bcbio_variation", data["config"])
    jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx2g"])
    cmd = ["bcbio-variation"] + jvm_opts + broad.get_default_jvm_opts(tmp_dir) + \
          ["variant-compare", config_file]
    do.run(cmd, "Comparing variant calls using bcbio.variation", data)
Example #7
0
def _get_jvm_opts(data, out_file):
    """Retrieve JVM options when running the Java version of VarDict.
    """
    if get_vardict_command(data) == "vardict-java":
        resources = config_utils.get_resources("vardict", data["config"])
        jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"])
        jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file))
        return "export VAR_DICT_OPTS='%s' && " % " ".join(jvm_opts)
    else:
        return ""
Example #8
0
def _get_jvm_opts(data, out_file):
    """Retrieve JVM options when running the Java version of VarDict.
    """
    if dd.get_variantcaller(data).endswith("-java"):
        resources = config_utils.get_resources("vardict", data["config"])
        jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"])
        jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file))
        return "export VAR_DICT_OPTS='%s' && " % " ".join(jvm_opts)
    else:
        return ""
Example #9
0
def _get_jvm_opts(out_file, data):
    """Retrieve Java options, adjusting memory for available cores.
    """
    resources = config_utils.get_resources("purple", data["config"])
    jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx3500m"])
    jvm_opts = config_utils.adjust_opts(jvm_opts, {"algorithm": {"memory_adjust":
                                                                 {"direction": "increase",
                                                                  "maximum": "30000M",
                                                                  "magnitude": dd.get_cores(data)}}})
    jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file))
    return jvm_opts
Example #10
0
def _get_fgbio_jvm_opts(data, tmpdir, scale_factor=None):
    cores, mem = _get_cores_memory(data)
    resources = config_utils.get_resources("fgbio", data["config"])
    jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"])
    if scale_factor and cores > scale_factor:
        jvm_opts = config_utils.adjust_opts(jvm_opts, {"algorithm": {"memory_adjust":
                                                                     {"direction": "increase",
                                                                      "magnitude": cores // scale_factor}}})
    jvm_opts += broad.get_default_jvm_opts()
    jvm_opts = " ".join(jvm_opts)
    return jvm_opts + " --tmp-dir %s" % tmpdir
Example #11
0
def _get_fgbio_jvm_opts(data, tmpdir, scale_factor=None):
    cores, mem = _get_cores_memory(data)
    resources = config_utils.get_resources("fgbio", data["config"])
    jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"])
    if scale_factor and cores > scale_factor:
        jvm_opts = config_utils.adjust_opts(jvm_opts, {"algorithm": {"memory_adjust":
                                                                     {"direction": "increase",
                                                                      "magnitude": cores // scale_factor}}})
    jvm_opts += broad.get_default_jvm_opts()
    jvm_opts = " ".join(jvm_opts)
    return jvm_opts + " --tmp-dir %s" % tmpdir
Example #12
0
def _get_varscan_opts(config, tmp_dir):
    """Retrieve common options for running VarScan.
    Handles jvm_opts, setting user and country to English to avoid issues
    with different locales producing non-compliant VCF.
    """
    resources = config_utils.get_resources("varscan", config)
    jvm_opts = resources.get("jvm_opts", ["-Xmx750m", "-Xmx2g"])
    jvm_opts = config_utils.adjust_opts(jvm_opts,
                                        {"algorithm": {"memory_adjust":
                                                       {"magnitude": 1.1, "direction": "decrease"}}})
    jvm_opts += ["-Duser.language=en", "-Duser.country=US"]
    jvm_opts += broad.get_default_jvm_opts(tmp_dir)
    return " ".join(jvm_opts)
Example #13
0
def _get_jvm_opts(out_file, data):
    """Retrieve Java options, adjusting memory for available cores.
    """
    resources = config_utils.get_resources("purple", data["config"])
    jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx3500m"])
    jvm_opts = config_utils.adjust_opts(
        jvm_opts, {
            "algorithm": {
                "memory_adjust": {
                    "direction": "increase",
                    "maximum": "30000M",
                    "magnitude": dd.get_cores(data)
                }
            }
        })
    jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file))
    return jvm_opts