Esempio n. 1
0
def get_analysis_intervals(data, vrn_file, base_dir):
    """Retrieve analysis regions for the current variant calling pipeline.
    """
    if vrn_file and "gvcf" in dd.get_tools_on(data):
        callable_bed = _callable_from_gvcf(data, vrn_file, base_dir)
        if callable_bed:
            return callable_bed

    if data.get("ensemble_bed"):
        return data["ensemble_bed"]
    elif dd.get_callable_regions(data):
        return dd.get_callable_regions(data)
    elif data.get("align_bam"):
        return callable.sample_callable_bed(data["align_bam"],
                                            dd.get_ref_file(data), data)[0]
    elif data.get("work_bam"):
        return callable.sample_callable_bed(data["work_bam"],
                                            dd.get_ref_file(data), data)[0]
    elif data.get("work_bam_callable"):
        return callable.sample_callable_bed(data["work_bam_callable"],
                                            dd.get_ref_file(data), data)[0]
    elif tz.get_in(["config", "algorithm", "callable_regions"], data):
        return tz.get_in(["config", "algorithm", "callable_regions"], data)
    elif tz.get_in(["config", "algorithm", "variant_regions"], data):
        return tz.get_in(["config", "algorithm", "variant_regions"], data)
Esempio n. 2
0
def get_analysis_intervals(data):
    """Retrieve analysis regions for the current variant calling pipeline.
    """
    if data.get("ensemble_bed"):
        return data["ensemble_bed"]
    elif dd.get_callable_regions(data):
        return dd.get_callable_regions(data)
    elif data.get("align_bam"):
        return callable.sample_callable_bed(data["align_bam"], dd.get_ref_file(data), data)
    elif data.get("work_bam"):
        return callable.sample_callable_bed(data["work_bam"], dd.get_ref_file(data), data)
    elif data.get("work_bam_callable"):
        return callable.sample_callable_bed(data["work_bam_callable"], dd.get_ref_file(data), data)
    elif tz.get_in(["config", "algorithm", "callable_regions"], data):
        return tz.get_in(["config", "algorithm", "callable_regions"], data)
    elif tz.get_in(["config", "algorithm", "variant_regions"], data):
        return tz.get_in(["config", "algorithm", "variant_regions"], data)
Esempio n. 3
0
def get_analysis_intervals(data):
    """Retrieve analysis regions for the current variant calling pipeline.
    """
    if data.get("ensemble_bed"):
        return data["ensemble_bed"]
    elif dd.get_callable_regions(data):
        return dd.get_callable_regions(data)
    elif data.get("align_bam"):
        return callable.sample_callable_bed(data["align_bam"],
                                            dd.get_ref_file(data), data)
    elif data.get("work_bam"):
        return callable.sample_callable_bed(data["work_bam"],
                                            dd.get_ref_file(data), data)
    elif data.get("work_bam_callable"):
        return callable.sample_callable_bed(data["work_bam_callable"],
                                            dd.get_ref_file(data), data)
    elif tz.get_in(["config", "algorithm", "callable_regions"], data):
        return tz.get_in(["config", "algorithm", "callable_regions"], data)
    elif tz.get_in(["config", "algorithm", "variant_regions"], data):
        return tz.get_in(["config", "algorithm", "variant_regions"], data)
Esempio n. 4
0
def _evaluate_vcf(calls, truth_vcf, work_dir, data):
    out_file = os.path.join(work_dir, os.path.join("%s-sv-validate.csv" % dd.get_sample_name(data)))
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                writer = csv.writer(out_handle)
                writer.writerow(["sample", "caller", "vtype", "metric", "value"])
                for call in calls:
                    for stats in _validate_caller_vcf(call["vrn_file"], truth_vcf, dd.get_callable_regions(data),
                                                      call["variantcaller"], data):

                        writer.writerow(stats)
    return out_file
Esempio n. 5
0
def get_analysis_intervals(data, vrn_file, base_dir):
    """Retrieve analysis regions for the current variant calling pipeline.
    """
    if vrn_file and "gvcf" in dd.get_tools_on(data):
        callable_bed = _callable_from_gvcf(data, vrn_file, base_dir)
        if callable_bed:
            return callable_bed

    if data.get("ensemble_bed"):
        return data["ensemble_bed"]
    elif dd.get_callable_regions(data):
        return dd.get_callable_regions(data)
    elif data.get("align_bam"):
        return callable.sample_callable_bed(data["align_bam"], dd.get_ref_file(data), data)
    elif data.get("work_bam"):
        return callable.sample_callable_bed(data["work_bam"], dd.get_ref_file(data), data)
    elif data.get("work_bam_callable"):
        return callable.sample_callable_bed(data["work_bam_callable"], dd.get_ref_file(data), data)
    elif tz.get_in(["config", "algorithm", "callable_regions"], data):
        return tz.get_in(["config", "algorithm", "callable_regions"], data)
    elif tz.get_in(["config", "algorithm", "variant_regions"], data):
        return tz.get_in(["config", "algorithm", "variant_regions"], data)
Esempio n. 6
0
def _evaluate_vcf(calls, truth_vcf, work_dir, data):
    out_file = os.path.join(
        work_dir,
        os.path.join("%s-sv-validate.csv" % dd.get_sample_name(data)))
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                writer = csv.writer(out_handle)
                writer.writerow(
                    ["sample", "caller", "vtype", "metric", "value"])
                for call in calls:
                    for stats in _validate_caller_vcf(
                            call["vrn_file"], truth_vcf,
                            dd.get_callable_regions(data),
                            call["variantcaller"], data):

                        writer.writerow(stats)
    return out_file
Esempio n. 7
0
def _run_purecn_dx(out, paired):
    """Extract copy number and mutational metrics from PureCN rds file.
    """
    out_base, out, all_files = _get_purecn_dx_files(paired, out)
    if not utils.file_uptodate(out["mutation_burden"], out["rds"]):
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            cmd = [
                "PureCN_Dx.R", "--rds", out["rds"], "--callable",
                dd.get_callable_regions(paired.tumor_data), "--signatures",
                "--out", tx_out_base
            ]
            do.run(cmd, "PureCN Dx mutational burden and signatures")
            for f in all_files:
                if os.path.exists(os.path.join(os.path.dirname(tx_out_base),
                                               f)):
                    shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                                os.path.join(os.path.dirname(out_base), f))
    return out