Example #1
0
def _add_variantcalls_to_output(out, data):
    """Call ploidy and convert into VCF and BED representations.
    """
    call_file = "%s-call%s" % os.path.splitext(out["cns"])
    gender = population.get_gender(data)
    if not utils.file_exists(call_file):
        with file_transaction(data, call_file) as tx_call_file:
            cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "call",
                   "--ploidy", str(dd.get_ploidy(data)),
                   "-o", tx_call_file, out["cns"]]
            if gender and gender.lower() != "unknown":
                cmd += ["--gender", gender]
                if gender.lower() == "male":
                    cmd += ["--male-reference"]
            do.run(cmd, "CNVkit call ploidy")
    calls = {}
    for outformat in ["bed", "vcf"]:
        out_file = "%s.%s" % (os.path.splitext(call_file)[0], outformat)
        calls[outformat] = out_file
        if not utils.file_exists(out_file):
            with file_transaction(data, out_file) as tx_out_file:
                cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "export",
                       outformat, "--sample-id", dd.get_sample_name(data),
                       "--ploidy", str(dd.get_ploidy(data)),
                       "-o", tx_out_file, call_file]
                if gender and gender.lower() == "male":
                    cmd += ["--male-reference"]
                do.run(cmd, "CNVkit export %s" % outformat)
    out["call_file"] = call_file
    out["vrn_bed"] = annotate.add_genes(calls["bed"], data)
    effects_vcf, _ = effects.add_to_vcf(calls["vcf"], data, "snpeff")
    out["vrn_file"] = effects_vcf or calls["vcf"]
    return out
Example #2
0
def _get_batch_gender(items):
    """Retrieve gender for a batch of items if consistent.

    Better not to specify for mixed populations, CNVkit will work
    it out
    https://github.com/bcbio/bcbio-nextgen/commit/1a0e217c8a4d3cee10fa890fb3cfd4db5034281d#r26279752
    """
    genders = set([population.get_gender(x) for x in items])
    if len(genders) == 1:
        gender = genders.pop()
        if gender != "unknown":
            return gender
Example #3
0
def _get_batch_gender(items):
    """Retrieve gender for a batch of items if consistent.

    Better not to specify for mixed populations, CNVkit will work
    it out
    https://github.com/bcbio/bcbio-nextgen/commit/1a0e217c8a4d3cee10fa890fb3cfd4db5034281d#r26279752
    """
    genders = set([population.get_gender(x) for x in items])
    if len(genders) == 1:
        gender = genders.pop()
        if gender != "unknown":
            return gender
Example #4
0
def _add_diagram_plot(out, data):
    out_file = "%s-diagram.pdf" % os.path.splitext(out["cnr"])[0]
    cnr = _remove_haplotype_chroms(out["cnr"], data)
    cns = _remove_haplotype_chroms(out["cns"], data)
    if _cnx_is_empty(cnr) or _cnx_is_empty(cns):
        return None
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            cmd = [_get_cmd(), "diagram", "-s", cns, "-o", tx_out_file, cnr]
            gender = population.get_gender(data)
            if gender and gender.lower() == "male":
                cmd += ["--male-reference"]
            do.run(_prep_cmd(cmd, tx_out_file), "CNVkit diagram plot")
    return out_file
Example #5
0
def _do_run(paired):
    """Perform Battenberg caling with the paired dataset.

    This purposely does not use a temporary directory for the output
    since Battenberg does smart restarts.
    """
    work_dir = _sv_workdir(paired.tumor_data)
    out = _get_battenberg_out(paired, work_dir)
    ignore_file = os.path.join(work_dir, "ignore_chromosomes.txt")
    if len(_missing_files(out)) > 0:
        ref_file = dd.get_ref_file(paired.tumor_data)
        bat_datadir = os.path.normpath(
            os.path.join(os.path.dirname(ref_file), os.pardir, "battenberg"))
        ignore_file, gl_file = _make_ignore_file(
            work_dir, ref_file, ignore_file,
            os.path.join(bat_datadir, "impute", "impute_info.txt"))
        local_sitelib = os.path.join(
            install.get_defaults().get("tooldir", "/usr/local"), "lib", "R",
            "site-library")
        tumor_bam = paired.tumor_bam
        normal_bam = paired.normal_bam
        platform = dd.get_platform(paired.tumor_data)
        genome_build = paired.tumor_data["genome_build"]
        # scale cores to avoid over-using memory during imputation
        cores = max(1, int(dd.get_num_cores(paired.tumor_data) * 0.5))
        gender = {
            "male": "XY",
            "female": "XX",
            "unknown": "L"
        }.get(population.get_gender(paired.tumor_data))
        if gender == "L":
            gender_str = "-ge %s -gl %s" % (gender, gl_file)
        else:
            gender_str = "-ge %s" % (gender)
        r_export_cmd = "unset R_HOME && export PATH=%s:$PATH && " % os.path.dirname(
            utils.Rscript_cmd())
        cmd = (
            "export R_LIBS_USER={local_sitelib} && {r_export_cmd}"
            "battenberg.pl -t {cores} -o {work_dir} -r {ref_file}.fai "
            "-tb {tumor_bam} -nb {normal_bam} -e {bat_datadir}/impute/impute_info.txt "
            "-u {bat_datadir}/1000genomesloci -c {bat_datadir}/probloci.txt "
            "-ig {ignore_file} {gender_str} "
            "-assembly {genome_build} -species Human -platform {platform}")
        do.run(cmd.format(**locals()), "Battenberg CNV calling")
    assert len(_missing_files(
        out)) == 0, "Missing Battenberg output: %s" % _missing_files(out)
    out["plot"] = _get_battenberg_out_plots(paired, work_dir)
    out["ignore"] = ignore_file
    return out
Example #6
0
def _add_diagram_plot(out, data):
    out_file = "%s-diagram.pdf" % os.path.splitext(out["cnr"])[0]
    cnr = _remove_haplotype_chroms(out["cnr"], data)
    cns = _remove_haplotype_chroms(out["cns"], data)
    if _cnx_is_empty(cnr) or _cnx_is_empty(cns):
        return None
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            cmd = [_get_cmd(), "diagram", "-s", cns,
                   "-o", tx_out_file, cnr]
            gender = population.get_gender(data)
            if gender and gender.lower() == "male":
                cmd += ["--male-reference"]
            do.run(cmd, "CNVkit diagram plot")
    return out_file
Example #7
0
def _add_variantcalls_to_output(out, data, items, is_somatic=False):
    """Call ploidy and convert into VCF and BED representations.
    """
    call_file = "%s-call%s" % os.path.splitext(out["cns"])
    if not utils.file_exists(call_file):
        with file_transaction(data, call_file) as tx_call_file:
            filters = ["--filter", "cn"]
            cmd = [os.path.join(os.path.dirname(sys.executable), "cnvkit.py"), "call"] + \
                  filters + \
                   ["--ploidy", str(ploidy.get_ploidy([data])),
                    "-o", tx_call_file, out["cns"]]
            small_vrn_files = _compatible_small_variants(data, items)
            if len(small_vrn_files) > 0 and _cna_has_values(out["cns"]):
                cmd += [
                    "--vcf", small_vrn_files[0].name, "--sample-id",
                    small_vrn_files[0].sample
                ]
                if small_vrn_files[0].normal:
                    cmd += ["--normal-id", small_vrn_files[0].normal]
                if not is_somatic:
                    cmd += ["-m", "clonal"]
            gender = population.get_gender(data)
            if gender and gender.lower() != "unknown":
                cmd += ["--gender", gender]
                if gender.lower() == "male":
                    cmd += ["--male-reference"]
            do.run(cmd, "CNVkit call ploidy")
    calls = {}
    for outformat in ["bed", "vcf"]:
        out_file = "%s.%s" % (os.path.splitext(call_file)[0], outformat)
        calls[outformat] = out_file
        if not os.path.exists(out_file):
            with file_transaction(data, out_file) as tx_out_file:
                cmd = [
                    os.path.join(os.path.dirname(sys.executable), "cnvkit.py"),
                    "export", outformat, "--sample-id",
                    dd.get_sample_name(data), "--ploidy",
                    str(ploidy.get_ploidy([data])), "-o", tx_out_file,
                    call_file
                ]
                if gender and gender.lower() == "male":
                    cmd += ["--male-reference"]
                do.run(cmd, "CNVkit export %s" % outformat)
    out["call_file"] = call_file
    out["vrn_bed"] = annotate.add_genes(calls["bed"], data)
    effects_vcf, _ = effects.add_to_vcf(calls["vcf"], data, "snpeff")
    out["vrn_file"] = effects_vcf or calls["vcf"]
    return out
Example #8
0
def _cnvkit_background(background_cnns, out_file, target_bed, antitarget_bed, data):
    """Calculate background reference, handling flat case with no normal sample.
    """
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            cmd = [_get_cmd(), "reference", "-f", dd.get_ref_file(data), "-o", tx_out_file]
            gender = population.get_gender(data)
            if gender and gender.lower() != "unknown":
                cmd += ["--gender", gender]
                if gender.lower() == "male":
                    cmd += ["--male-reference"]
            if len(background_cnns) == 0:
                cmd += ["-t", target_bed, "-a", antitarget_bed]
            else:
                cmd += background_cnns
            do.run(_prep_cmd(cmd, tx_out_file), "CNVkit background")
    return out_file
Example #9
0
def cnvkit_background(background_cnns, out_file, items, target_bed=None, antitarget_bed=None):
    """Calculate background reference, handling flat case with no normal sample.
    """
    if not utils.file_exists(out_file):
        with file_transaction(items[0], out_file) as tx_out_file:
            cmd = [_get_cmd(), "reference", "-f", dd.get_ref_file(items[0]), "-o", tx_out_file]
            genders = set([population.get_gender(x) for x in items])
            genders.discard("unknown")
            if len(genders) == 1:
                gender = genders.pop()
                cmd += ["--gender", gender]
                if gender.lower() == "male":
                    cmd += ["--male-reference"]
            if len(background_cnns) == 0:
                assert target_bed and antitarget_bed, "Missing CNNs and target BEDs for flat background"
                cmd += ["-t", target_bed, "-a", antitarget_bed]
            else:
                cmd += background_cnns
            do.run(_prep_cmd(cmd, tx_out_file), "CNVkit background")
    return out_file
Example #10
0
def _do_run(paired):
    """Perform Battenberg caling with the paired dataset.

    This purposely does not use a temporary directory for the output
    since Battenberg does smart restarts.
    """
    work_dir = _sv_workdir(paired.tumor_data)
    out = _get_battenberg_out(paired, work_dir)
    ignore_file = os.path.join(work_dir, "ignore_chromosomes.txt")
    if len(_missing_files(out)) > 0:
        ref_file = dd.get_ref_file(paired.tumor_data)
        bat_datadir = os.path.normpath(os.path.join(os.path.dirname(ref_file), os.pardir, "battenberg"))
        ignore_file, gl_file = _make_ignore_file(work_dir, ref_file, ignore_file,
                                                 os.path.join(bat_datadir, "impute", "impute_info.txt"))
        local_sitelib = os.path.join(install.get_defaults().get("tooldir", "/usr/local"),
                                     "lib", "R", "site-library")
        tumor_bam = paired.tumor_bam
        normal_bam = paired.normal_bam
        platform = dd.get_platform(paired.tumor_data)
        genome_build = paired.tumor_data["genome_build"]
        # scale cores to avoid over-using memory during imputation
        cores = max(1, int(dd.get_num_cores(paired.tumor_data) * 0.5))
        gender = {"male": "XY", "female": "XX", "unknown": "L"}.get(population.get_gender(paired.tumor_data))
        if gender == "L":
            gender_str = "-ge %s -gl %s" % (gender, gl_file)
        else:
            gender_str = "-ge %s" % (gender)
        r_export_cmd = "unset R_HOME && export PATH=%s:$PATH && " % os.path.dirname(utils.Rscript_cmd())
        cmd = ("export R_LIBS_USER={local_sitelib} && {r_export_cmd}"
               "battenberg.pl -t {cores} -o {work_dir} -r {ref_file}.fai "
               "-tb {tumor_bam} -nb {normal_bam} -e {bat_datadir}/impute/impute_info.txt "
               "-u {bat_datadir}/1000genomesloci -c {bat_datadir}/probloci.txt "
               "-ig {ignore_file} {gender_str} "
               "-assembly {genome_build} -species Human -platform {platform}")
        do.run(cmd.format(**locals()), "Battenberg CNV calling")
    assert len(_missing_files(out)) == 0, "Missing Battenberg output: %s" % _missing_files(out)
    out["plot"] = _get_battenberg_out_plots(paired, work_dir)
    out["ignore"] = ignore_file
    return out