def _get_target_access_files(cov_interval, data, work_dir): """Retrieve target and access files based on the type of data to process. pick targets, anti-targets and access files based on analysis type http://cnvkit.readthedocs.org/en/latest/nonhybrid.html """ base_regions = regions.get_sv_bed(data) # if we don't have a configured BED or regions to use for SV caling if not base_regions: # For genome calls, subset to regions within 10kb of genes if cov_interval == "genome": base_regions = regions.get_sv_bed(data, "transcripts1e4", work_dir) if base_regions: base_regions = shared.remove_exclude_regions(base_regions, base_regions, [data]) # Finally, default to the defined variant regions if not base_regions: base_regions = dd.get_variant_regions(data) target_bed = bedutils.merge_overlaps(base_regions, data, out_dir=work_dir) if cov_interval == "amplicon": return target_bed, target_bed elif cov_interval == "genome": return target_bed, target_bed else: access_file = _create_access_file(dd.get_ref_file(data), _sv_workdir(data), data) return target_bed, access_file
def _get_target_access_files(cov_interval, data, work_dir): """Retrieve target and access files based on the type of data to process. pick targets, anti-targets and access files based on analysis type http://cnvkit.readthedocs.org/en/latest/nonhybrid.html """ base_regions = regions.get_sv_bed(data) # if we don't have a configured BED or regions to use for SV caling if not base_regions: # For genome calls, subset to regions within 10kb of genes if cov_interval == "genome": base_regions = regions.get_sv_bed(data, "transcripts1e4", work_dir) if base_regions: base_regions = shared.remove_exclude_regions( base_regions, base_regions, [data]) # Finally, default to the defined variant regions if not base_regions: base_regions = dd.get_variant_regions(data) target_bed = bedutils.merge_overlaps(base_regions, data, out_dir=work_dir) if cov_interval == "amplicon": return target_bed, target_bed elif cov_interval == "genome": return target_bed, target_bed else: access_file = _create_access_file(dd.get_ref_file(data), _sv_workdir(data), data) return target_bed, access_file
def _run_wham(inputs, background_bams): """Run WHAM on a defined set of inputs and targets. """ out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf" % dd.get_sample_name(inputs[0])) input_bams = [x["align_bam"] for x in inputs] if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: cores = dd.get_cores(inputs[0]) background = "-b %s" % ",".join(background_bams) if background_bams else "" target_bams = ",".join(x["align_bam"] for x in inputs) target_bed = shared.remove_exclude_regions( tz.get_in(["config", "algorithm", "variant_regions"], inputs[0]), out_file, inputs) ref_file = dd.get_ref_file(inputs[0]) target_str = "-e %s" % target_bed if target_bed else "" cmd = ("WHAM-BAM -x {cores} -f {ref_file} -t {target_bams} {background} {target_str} > {tx_out_file}") do.run(cmd.format(**locals()), "Run WHAM") return out_file
def _run_wham(inputs, background_bams): """Run WHAM on a defined set of inputs and targets. """ out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf" % dd.get_sample_name(inputs[0])) input_bams = [x["align_bam"] for x in inputs] if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: cores = dd.get_cores(inputs[0]) background = "-b %s" % ",".join( background_bams) if background_bams else "" target_bams = ",".join(x["align_bam"] for x in inputs) target_bed = shared.remove_exclude_regions( tz.get_in(["config", "algorithm", "variant_regions"], inputs[0]), out_file, inputs) ref_file = dd.get_ref_file(inputs[0]) target_str = "-e %s" % target_bed if target_bed else "" cmd = ( "WHAM-BAM -x {cores} -f {ref_file} -t {target_bams} {background} {target_str} > {tx_out_file}" ) do.run(cmd.format(**locals()), "Run WHAM") return out_file