def _create_subset_file(in_file, het_region_bed, work_dir, data): """Subset the VCF to a set of pre-calculated smaller regions. """ cnv_regions = shared.get_base_cnv_regions(data, work_dir) region_bed = bedutils.intersect_two(het_region_bed, cnv_regions, work_dir, data) out_file = os.path.join(work_dir, "%s-origsubset.bcf" % utils.splitext_plus(os.path.basename(in_file))[0]) if not utils.file_uptodate(out_file, in_file): with file_transaction(data, out_file) as tx_out_file: regions = ("-R %s" % region_bed) if utils.file_exists(region_bed) else "" cmd = "bcftools view {regions} -o {tx_out_file} -O b {in_file}" do.run(cmd.format(**locals()), "Extract regions for BubbleTree frequency determination") return out_file
def _create_subset_file(in_file, het_region_bed, work_dir, data): """Subset the VCF to a set of pre-calculated smaller regions. """ cnv_regions = shared.get_base_cnv_regions(data, work_dir) region_bed = bedutils.intersect_two(het_region_bed, cnv_regions, work_dir, data) out_file = os.path.join(work_dir, "%s-origsubset.bcf" % utils.splitext_plus(os.path.basename(in_file))[0]) if not utils.file_uptodate(out_file, in_file): with file_transaction(data, out_file) as tx_out_file: regions = ("-R %s" % region_bed) if utils.file_exists(region_bed) else "" cmd = "bcftools view {regions} -o {tx_out_file} -O b {in_file}" do.run(cmd.format(**locals()), "Extract regions for BubbleTree frequency determination") return out_file
def heterogzygote_counts(paired): """Provide tumor/normal counts at population heterozyogte sites with CollectAllelicCounts. """ work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(paired.tumor_data), "structural", "counts")) key = "germline_het_pon" het_bed = tz.get_in(["genome_resources", "variation", key], paired.tumor_data) vr = bedutils.population_variant_regions([x for x in [paired.tumor_data, paired.normal_data] if x]) cur_het_bed = bedutils.intersect_two(het_bed, vr, work_dir, paired.tumor_data) tumor_counts = _run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.tumor_data) normal_counts = (_run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.normal_data) if paired.normal_data else None) return tumor_counts, normal_counts
def heterogzygote_counts(paired): """Provide tumor/normal counts at population heterozyogte sites with CollectAllelicCounts. """ work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(paired.tumor_data), "structural", "counts")) key = "germline_het_pon" het_bed = tz.get_in(["genome_resources", "variation", key], paired.tumor_data) vr = bedutils.population_variant_regions([x for x in [paired.tumor_data, paired.normal_data] if x]) cur_het_bed = bedutils.intersect_two(het_bed, vr, work_dir, paired.tumor_data) tumor_counts = _run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.tumor_data) normal_counts = (_run_collect_allelic_counts(cur_het_bed, key, work_dir, paired.normal_data) if paired.normal_data else None) if normal_counts: tumor_counts, normal_counts = _filter_by_normal(tumor_counts, normal_counts, paired.tumor_data) return tumor_counts, normal_counts