Example #1
0
def prepare_exclude_file(items, base_file, chrom=None):
    """Prepare a BED file for exclusion, incorporating variant regions and chromosome.

    Excludes locally repetitive regions (if `remove_lcr` is set) and
    centromere regions, both of which contribute to long run times and
    false positive structural variant calls.
    """
    out_file = "%s-exclude.bed" % utils.splitext_plus(base_file)[0]
    all_vrs = _get_variant_regions(items)
    ready_region = (shared.subset_variant_regions(tz.first(all_vrs), chrom, base_file, items)
                    if len(all_vrs) > 0 else chrom)
    with shared.bedtools_tmpdir(items[0]):
        # Get a bedtool for the full region if no variant regions
        if ready_region == chrom:
            want_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]),
                                                    items[0]["config"], chrom)
            lcr_bed = shared.get_lcr_bed(items)
            if lcr_bed:
                want_bedtool = want_bedtool.subtract(pybedtools.BedTool(lcr_bed))
        else:
            want_bedtool = pybedtools.BedTool(ready_region).saveas()
        sv_exclude_bed = _get_sv_exclude_file(items)
        if sv_exclude_bed and len(want_bedtool) > 0:
            want_bedtool = want_bedtool.subtract(sv_exclude_bed).saveas()
        if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"):
            with file_transaction(out_file) as tx_out_file:
                full_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]),
                                                        items[0]["config"])
                if len(want_bedtool) > 0:
                    full_bedtool.subtract(want_bedtool).saveas(tx_out_file)
                else:
                    full_bedtool.saveas(tx_out_file)
    return out_file
Example #2
0
def prepare_exclude_file(items, base_file, chrom=None):
    """Prepare a BED file for exclusion.

    Excludes high depth and centromere regions which contribute to long run times and
    false positive structural variant calls.
    """
    out_file = "%s-exclude%s.bed" % (utils.splitext_plus(base_file)[0], "-%s" % chrom if chrom else "")
    if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"):
        with shared.bedtools_tmpdir(items[0]):
            # Get a bedtool for the full region if no variant regions
            want_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]),
                                                    items[0]["config"], chrom)
            if chrom:
                want_bedtool = pybedtools.BedTool(shared.subset_bed_by_chrom(want_bedtool.saveas().fn,
                                                                             chrom, items[0]))
            sv_exclude_bed = _get_sv_exclude_file(items)
            if sv_exclude_bed and len(want_bedtool) > 0:
                want_bedtool = want_bedtool.subtract(sv_exclude_bed, nonamecheck=True).saveas()
            if any(dd.get_coverage_interval(d) == "genome" for d in items):
                want_bedtool = pybedtools.BedTool(shared.remove_highdepth_regions(want_bedtool.saveas().fn, items))
            with file_transaction(items[0], out_file) as tx_out_file:
                full_bedtool = callable.get_ref_bedtool(tz.get_in(["reference", "fasta", "base"], items[0]),
                                                        items[0]["config"])
                if len(want_bedtool) > 0:
                    full_bedtool.subtract(want_bedtool, nonamecheck=True).saveas(tx_out_file)
                else:
                    full_bedtool.saveas(tx_out_file)
    return out_file
Example #3
0
def prepare_exclude_file(items, base_file, chrom=None):
    """Prepare a BED file for exclusion, incorporating variant regions and chromosome.

    Excludes locally repetitive regions (if `remove_lcr` is set) and
    centromere regions, both of which contribute to long run times and
    false positive structural variant calls.
    """
    out_file = "%s-exclude%s.bed" % (utils.splitext_plus(base_file)[0],
                                     "-%s" % chrom if chrom else "")
    if not utils.file_exists(out_file) and not utils.file_exists(out_file +
                                                                 ".gz"):
        with shared.bedtools_tmpdir(items[0]):
            # Get a bedtool for the full region if no variant regions
            want_bedtool = callable.get_ref_bedtool(
                tz.get_in(["reference", "fasta", "base"], items[0]),
                items[0]["config"], chrom)
            if chrom:
                want_bedtool = pybedtools.BedTool(
                    shared.subset_bed_by_chrom(want_bedtool.saveas().fn, chrom,
                                               items[0]))
            lcr_bed = shared.get_lcr_bed(items)
            if lcr_bed:
                want_bedtool = want_bedtool.subtract(
                    pybedtools.BedTool(lcr_bed))
            sv_exclude_bed = _get_sv_exclude_file(items)
            if sv_exclude_bed and len(want_bedtool) > 0:
                want_bedtool = want_bedtool.subtract(sv_exclude_bed).saveas()
            want_bedtool = pybedtools.BedTool(
                shared.remove_highdepth_regions(want_bedtool.saveas().fn,
                                                items))
            with file_transaction(items[0], out_file) as tx_out_file:
                full_bedtool = callable.get_ref_bedtool(
                    tz.get_in(["reference", "fasta", "base"], items[0]),
                    items[0]["config"])
                if len(want_bedtool) > 0:
                    full_bedtool.subtract(want_bedtool).saveas(tx_out_file)
                else:
                    full_bedtool.saveas(tx_out_file)
    return out_file
Example #4
0
def prepare_exclude_file(items, base_file, chrom=None):
    """Prepare a BED file for exclusion, incorporating variant regions and chromosome.

    Excludes locally repetitive regions (if `remove_lcr` is set) and
    centromere regions, both of which contribute to long run times and
    false positive structural variant calls.
    """
    out_file = "%s-exclude.bed" % utils.splitext_plus(base_file)[0]
    all_vrs = _get_variant_regions(items)
    ready_region = (shared.subset_variant_regions(tz.first(all_vrs), chrom,
                                                  base_file, items)
                    if len(all_vrs) > 0 else chrom)
    # Get a bedtool for the full region if no variant regions
    if ready_region == chrom:
        want_bedtool = callable.get_ref_bedtool(
            tz.get_in(["reference", "fasta", "base"], items[0]),
            items[0]["config"], chrom)
        lcr_bed = shared.get_lcr_bed(items)
        if lcr_bed:
            want_bedtool = want_bedtool.subtract(pybedtools.BedTool(lcr_bed))
    else:
        want_bedtool = pybedtools.BedTool(ready_region).saveas()
    sv_exclude_bed = _get_sv_exclude_file(items)
    if sv_exclude_bed and len(want_bedtool) > 0:
        want_bedtool = want_bedtool.subtract(sv_exclude_bed).saveas()
    if not utils.file_exists(out_file) and not utils.file_exists(out_file +
                                                                 ".gz"):
        with file_transaction(out_file) as tx_out_file:
            full_bedtool = callable.get_ref_bedtool(
                tz.get_in(["reference", "fasta", "base"], items[0]),
                items[0]["config"])
            if len(want_bedtool) > 0:
                full_bedtool.subtract(want_bedtool).saveas(tx_out_file)
            else:
                full_bedtool.saveas(tx_out_file)
    return out_file
Example #5
0
def main(ref_file):
    ref_bedtool = get_ref_bedtool(ref_file, {})

    mappable_file = os.path.basename(URL)
    r = requests.get(URL, stream=True)
    with open(mappable_file, "wb") as f:
        shutil.copyfileobj(r.raw, f)

    ref_bedtool.subtract(mappable_file,
                         nonamecheck=True).saveas(OUT_FILE + ".tmp")
    with open(OUT_FILE + ".tmp") as in_handle:
        with open(OUT_FILE, "w") as out_handle:
            for line in in_handle:
                if chromhacks.is_nonalt(line.split()[0]):
                    out_handle.write("%s\tumap_k100_mappability\n" %
                                     line.strip())
    os.remove(OUT_FILE + ".tmp")
    vcfutils.bgzip_and_index(OUT_FILE)
    os.remove(mappable_file)