Esempio n. 1
0
def _create_subset_file(in_file, het_region_bed, work_dir, data):
    """Subset the VCF to a set of pre-calculated smaller regions.
    """
    cnv_regions = shared.get_base_cnv_regions(data, work_dir)
    region_bed = bedutils.intersect_two(het_region_bed, cnv_regions, work_dir, data)
    out_file = os.path.join(work_dir, "%s-origsubset.bcf" % utils.splitext_plus(os.path.basename(in_file))[0])
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(data, out_file) as tx_out_file:
            regions = ("-R %s" % region_bed) if utils.file_exists(region_bed) else ""
            cmd = "bcftools view {regions} -o {tx_out_file} -O b {in_file}"
            do.run(cmd.format(**locals()), "Extract regions for BubbleTree frequency determination")
    return out_file
Esempio n. 2
0
def _create_subset_file(in_file, het_region_bed, work_dir, data):
    """Subset the VCF to a set of pre-calculated smaller regions.
    """
    cnv_regions = shared.get_base_cnv_regions(data, work_dir)
    region_bed = bedutils.intersect_two(het_region_bed, cnv_regions, work_dir, data)
    out_file = os.path.join(work_dir, "%s-origsubset.bcf" % utils.splitext_plus(os.path.basename(in_file))[0])
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(data, out_file) as tx_out_file:
            regions = ("-R %s" % region_bed) if utils.file_exists(region_bed) else ""
            cmd = "bcftools view {regions} -o {tx_out_file} -O b {in_file}"
            do.run(cmd.format(**locals()), "Extract regions for BubbleTree frequency determination")
    return out_file
Esempio n. 3
0
def heterogzygote_counts(paired):
    """Provide tumor/normal counts at population heterozyogte sites with CollectAllelicCounts.
    """
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(paired.tumor_data), "structural", "counts"))
    key = "germline_het_pon"
    het_bed = tz.get_in(["genome_resources", "variation", key], paired.tumor_data)
    vr = bedutils.population_variant_regions([x for x in [paired.tumor_data, paired.normal_data] if x])
    cur_het_bed = bedutils.intersect_two(het_bed, vr, work_dir, paired.tumor_data)
    tumor_counts = _run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.tumor_data)
    normal_counts = (_run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.normal_data)
                     if paired.normal_data else None)
    return tumor_counts, normal_counts
Esempio n. 4
0
def heterogzygote_counts(paired):
    """Provide tumor/normal counts at population heterozyogte sites with CollectAllelicCounts.
    """
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(paired.tumor_data), "structural", "counts"))
    key = "germline_het_pon"
    het_bed = tz.get_in(["genome_resources", "variation", key], paired.tumor_data)
    vr = bedutils.population_variant_regions([x for x in [paired.tumor_data, paired.normal_data] if x])
    cur_het_bed = bedutils.intersect_two(het_bed, vr, work_dir, paired.tumor_data)
    tumor_counts = _run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.tumor_data)
    normal_counts = (_run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.normal_data)
                     if paired.normal_data else None)
    if normal_counts:
        tumor_counts, normal_counts = _filter_by_normal(tumor_counts, normal_counts, paired.tumor_data)
    return tumor_counts, normal_counts