def _create_subset_file(in_file, het_region_bed, work_dir, data): """Subset the VCF to a set of pre-calculated smaller regions. """ cnv_regions = shared.get_base_cnv_regions(data, work_dir) region_bed = bedutils.intersect_two(het_region_bed, cnv_regions, work_dir, data) out_file = os.path.join(work_dir, "%s-origsubset.bcf" % utils.splitext_plus(os.path.basename(in_file))[0]) if not utils.file_uptodate(out_file, in_file): with file_transaction(data, out_file) as tx_out_file: regions = ("-R %s" % region_bed) if utils.file_exists(region_bed) else "" cmd = "bcftools view {regions} -o {tx_out_file} -O b {in_file}" do.run(cmd.format(**locals()), "Extract regions for BubbleTree frequency determination") return out_file
def _get_target_access_files(cov_interval, data, work_dir): """Retrieve target and access files based on the type of data to process. pick targets, anti-targets and access files based on analysis type http://cnvkit.readthedocs.org/en/latest/nonhybrid.html """ base_regions = shared.get_base_cnv_regions(data, work_dir) target_bed = bedutils.merge_overlaps(base_regions, data, out_dir=work_dir) if cov_interval == "amplicon": return target_bed, target_bed elif cov_interval == "genome": return target_bed, target_bed else: access_file = _create_access_file(dd.get_ref_file(data), _sv_workdir(data), data) return target_bed, access_file
def _get_target_access_files(cov_interval, data, work_dir): """Retrieve target and access files based on the type of data to process. pick targets, anti-targets and access files based on analysis type http://cnvkit.readthedocs.org/en/latest/nonhybrid.html """ base_regions = shared.get_base_cnv_regions(data, work_dir) target_bed = bedutils.sort_merge(base_regions, data, out_dir=work_dir) if cov_interval == "amplicon": return target_bed, target_bed elif cov_interval == "genome": return target_bed, target_bed else: access_file = _create_access_file(dd.get_ref_file(data), _sv_workdir(data), data) return target_bed, access_file