Beispiel #1
0
def prepare_exclude_file(items, base_file, chrom=None):
    """Prepare a BED file for exclusion.

    Excludes high depth and centromere regions which contribute to long run times and
    false positive structural variant calls.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    out_file = "%s-exclude%s.bed" % (utils.splitext_plus(base_file)[0],
                                     "-%s" % chrom if chrom else "")
    if not utils.file_exists(out_file) and not utils.file_exists(out_file +
                                                                 ".gz"):
        with shared.bedtools_tmpdir(items[0]):
            with file_transaction(items[0], out_file) as tx_out_file:
                # Get a bedtool for the full region if no variant regions
                want_bedtool = callable.get_ref_bedtool(
                    tz.get_in(["reference", "fasta", "base"], items[0]),
                    items[0]["config"], chrom)
                want_bedtool = pybedtools.BedTool(
                    shared.subset_variant_regions(want_bedtool.saveas().fn,
                                                  chrom, tx_out_file, items))
                sv_exclude_bed = _get_sv_exclude_file(items)
                if sv_exclude_bed and len(want_bedtool) > 0:
                    want_bedtool = want_bedtool.subtract(
                        sv_exclude_bed, nonamecheck=True).saveas()
                full_bedtool = callable.get_ref_bedtool(
                    tz.get_in(["reference", "fasta", "base"], items[0]),
                    items[0]["config"])
                if len(want_bedtool) > 0:
                    full_bedtool.subtract(want_bedtool,
                                          nonamecheck=True).saveas(tx_out_file)
                else:
                    full_bedtool.saveas(tx_out_file)
    return out_file
Beispiel #2
0
def prepare_exclude_file(items, base_file, chrom=None):
    """Prepare a BED file for exclusion.

    Excludes high depth and centromere regions which contribute to long run times and
    false positive structural variant calls.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    out_file = "%s-exclude%s.bed" % (utils.splitext_plus(base_file)[0], "-%s" % chrom if chrom else "")
    if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"):
        with shared.bedtools_tmpdir(items[0]):
            with file_transaction(items[0], out_file) as tx_out_file:
                # Get a bedtool for the full region if no variant regions
                want_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]),
                                                        items[0]["config"], chrom)
                want_bedtool = pybedtools.BedTool(shared.subset_variant_regions(want_bedtool.saveas().fn,
                                                                                chrom, tx_out_file, items))
                sv_exclude_bed = _get_sv_exclude_file(items)
                if sv_exclude_bed and len(want_bedtool) > 0:
                    want_bedtool = want_bedtool.subtract(sv_exclude_bed, nonamecheck=True).saveas()
                full_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]),
                                                        items[0]["config"])
                if len(want_bedtool) > 0:
                    full_bedtool.subtract(want_bedtool, nonamecheck=True).saveas(tx_out_file)
                else:
                    full_bedtool.saveas(tx_out_file)
    return out_file
Beispiel #3
0
def run_freebayes(align_bams,
                  items,
                  ref_file,
                  assoc_files,
                  region=None,
                  out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    if is_paired_analysis(align_bams, items):
        paired = get_paired_bams(align_bams, items)
        if not paired.normal_bam:
            call_file = _run_freebayes_caller(align_bams,
                                              items,
                                              ref_file,
                                              assoc_files,
                                              region,
                                              out_file,
                                              somatic=paired)
        else:
            call_file = _run_freebayes_paired(
                [paired.tumor_bam, paired.normal_bam],
                [paired.tumor_data, paired.normal_data], ref_file, assoc_files,
                region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)

    return call_file
Beispiel #4
0
def run_vardict(align_bams, items, ref_file, assoc_files, region=None,
                out_file=None):
    """Run VarDict variant calling.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    if vcfutils.is_paired_analysis(align_bams, items):
        call_file = _run_vardict_paired(align_bams, items, ref_file,
                                        assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_vardict_caller(align_bams, items, ref_file,
                                        assoc_files, region, out_file)
    return call_file
Beispiel #5
0
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None,
                  out_file=None):
    """Run FreeBayes variant calling, either paired tumor/normal or germline calling.
    """
    items = shared.add_highdepth_genome_exclusion(items)
    if is_paired_analysis(align_bams, items):
        paired = get_paired_bams(align_bams, items)
        if not paired.normal_bam:
            call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                              assoc_files, region, out_file, somatic=paired)
        else:
            call_file = _run_freebayes_paired([paired.tumor_bam, paired.normal_bam],
                                              [paired.tumor_data, paired.normal_data],
                                              ref_file, assoc_files, region, out_file)
    else:
        vcfutils.check_paired_problems(items)
        call_file = _run_freebayes_caller(align_bams, items, ref_file,
                                          assoc_files, region, out_file)

    return call_file