def prepare_exclude_file(items, base_file, chrom=None): """Prepare a BED file for exclusion. Excludes high depth and centromere regions which contribute to long run times and false positive structural variant calls. """ items = shared.add_highdepth_genome_exclusion(items) out_file = "%s-exclude%s.bed" % (utils.splitext_plus(base_file)[0], "-%s" % chrom if chrom else "") if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"): with shared.bedtools_tmpdir(items[0]): with file_transaction(items[0], out_file) as tx_out_file: # Get a bedtool for the full region if no variant regions want_bedtool = callable.get_ref_bedtool( tz.get_in(["reference", "fasta", "base"], items[0]), items[0]["config"], chrom) want_bedtool = pybedtools.BedTool( shared.subset_variant_regions(want_bedtool.saveas().fn, chrom, tx_out_file, items)) sv_exclude_bed = _get_sv_exclude_file(items) if sv_exclude_bed and len(want_bedtool) > 0: want_bedtool = want_bedtool.subtract( sv_exclude_bed, nonamecheck=True).saveas() full_bedtool = callable.get_ref_bedtool( tz.get_in(["reference", "fasta", "base"], items[0]), items[0]["config"]) if len(want_bedtool) > 0: full_bedtool.subtract(want_bedtool, nonamecheck=True).saveas(tx_out_file) else: full_bedtool.saveas(tx_out_file) return out_file
def prepare_exclude_file(items, base_file, chrom=None): """Prepare a BED file for exclusion. Excludes high depth and centromere regions which contribute to long run times and false positive structural variant calls. """ items = shared.add_highdepth_genome_exclusion(items) out_file = "%s-exclude%s.bed" % (utils.splitext_plus(base_file)[0], "-%s" % chrom if chrom else "") if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"): with shared.bedtools_tmpdir(items[0]): with file_transaction(items[0], out_file) as tx_out_file: # Get a bedtool for the full region if no variant regions want_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]), items[0]["config"], chrom) want_bedtool = pybedtools.BedTool(shared.subset_variant_regions(want_bedtool.saveas().fn, chrom, tx_out_file, items)) sv_exclude_bed = _get_sv_exclude_file(items) if sv_exclude_bed and len(want_bedtool) > 0: want_bedtool = want_bedtool.subtract(sv_exclude_bed, nonamecheck=True).saveas() full_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]), items[0]["config"]) if len(want_bedtool) > 0: full_bedtool.subtract(want_bedtool, nonamecheck=True).saveas(tx_out_file) else: full_bedtool.saveas(tx_out_file) return out_file
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run FreeBayes variant calling, either paired tumor/normal or germline calling. """ items = shared.add_highdepth_genome_exclusion(items) if is_paired_analysis(align_bams, items): paired = get_paired_bams(align_bams, items) if not paired.normal_bam: call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file, somatic=paired) else: call_file = _run_freebayes_paired( [paired.tumor_bam, paired.normal_bam], [paired.tumor_data, paired.normal_data], ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def run_vardict(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run VarDict variant calling. """ items = shared.add_highdepth_genome_exclusion(items) if vcfutils.is_paired_analysis(align_bams, items): call_file = _run_vardict_paired(align_bams, items, ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_vardict_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run FreeBayes variant calling, either paired tumor/normal or germline calling. """ items = shared.add_highdepth_genome_exclusion(items) if is_paired_analysis(align_bams, items): paired = get_paired_bams(align_bams, items) if not paired.normal_bam: call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file, somatic=paired) else: call_file = _run_freebayes_paired([paired.tumor_bam, paired.normal_bam], [paired.tumor_data, paired.normal_data], ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file