Ejemplo n.º 1
0
def apply_genotypes_to_vcf(src_vcf, out_vcf, genotypes, sample, sum_quals):
    # initializations
    bnd_cache = {}
    src_vcf.write_header(out_vcf)
    total_variants = len(list(vcf_variants(src_vcf.filename)))

    for i, vline in enumerate(vcf_variants(src_vcf.filename)):
        v = vline.rstrip().split('\t')
        variant = Variant(v, src_vcf)
        if not sum_quals:
            variant.qual = 0

        if not variant.has_svtype():
            msg = ('Warning: SVTYPE missing '
                   'at variant %s. '
                   'Skipping.\n') % (variant.var_id)
            logit(msg)
            variant.write(out_vcf)
            continue

        if not variant.is_valid_svtype():
            msg = ('Warning: Unsupported SVTYPE '
                   'at variant %s (%s). '
                   'Skipping.\n') % (variant.var_id, variant.get_svtype())
            logit(msg)
            variant.write(out_vcf)
            continue

        # special BND processing
        if variant.get_svtype() == 'BND':
            if variant.info['MATEID'] in bnd_cache:
                variant2 = variant
                variant = bnd_cache[variant.info['MATEID']]
                del bnd_cache[variant.var_id]
            else:
                bnd_cache[variant.var_id] = variant
                continue

        result = genotypes[variant.var_id]

        if result is None:
            msg = ("Found no genotype results for variant "
                   "'{}' ({})").format(variant.var_id, variant.get_svtype())
            logit(msg)
            raise RuntimeError(msg)

        variant = assign_genotype_to_variant(variant, sample, result)
        variant.write(out_vcf)

        # special BND processing
        if variant.get_svtype() == 'BND':
            variant2.qual = variant.qual
            variant2.active_formats = variant.active_formats
            variant2.genotype = variant.genotype
            variant2.write(out_vcf)
Ejemplo n.º 2
0
def apply_genotypes_to_vcf(src_vcf, out_vcf, genotypes, sample, sum_quals):
    # initializations
    bnd_cache = {}
    src_vcf.write_header(out_vcf)
    total_variants = len(list(vcf_variants(src_vcf.filename)))

    for i, vline in enumerate(vcf_variants(src_vcf.filename)):
        v = vline.rstrip().split('\t')
        variant = Variant(v, src_vcf)
        if not sum_quals:
            variant.qual = 0

        if not variant.has_svtype():
            msg = ('Warning: SVTYPE missing '
                   'at variant %s. '
                   'Skipping.\n') % (variant.var_id)
            logit(msg)
            variant.write(out_vcf)
            continue

        if not variant.is_valid_svtype():
            msg = ('Warning: Unsupported SVTYPE '
                   'at variant %s (%s). '
                   'Skipping.\n') % (variant.var_id, variant.get_svtype())
            logit(msg)
            variant.write(out_vcf)
            continue

        # special BND processing
        if variant.get_svtype() == 'BND':
            if variant.info['MATEID'] in bnd_cache:
                variant2 = variant
                variant = bnd_cache[variant.info['MATEID']]
                del bnd_cache[variant.var_id]
            else:
                bnd_cache[variant.var_id] = variant
                continue

        result = genotypes[variant.var_id]

        if result is None:
            msg = ("Found no genotype results for variant "
                   "'{}' ({})").format(variant.var_id, variant.get_svtype())
            logit(msg)
            raise RuntimeError(msg)

        variant = assign_genotype_to_variant(variant, sample, result)
        variant.write(out_vcf)

        # special BND processing
        if variant.get_svtype() == 'BND':
            variant2.qual = variant.qual
            variant2.active_formats = variant.active_formats
            variant2.genotype = variant.genotype
            variant2.write(out_vcf)
Ejemplo n.º 3
0
def collect_breakpoints(vcf, max_ci_dist):
    breakpoints = []
    for vline in vcf_variants(vcf.filename):
        v = vline.rstrip().split('\t')
        variant = Variant(v, vcf)
        if not variant.has_svtype(): continue
        if not variant.is_valid_svtype(): continue
        brkpts = vcf.get_variant_breakpoints(variant, max_ci_dist)
        if brkpts is None: continue
        breakpoints.append(brkpts)
    return breakpoints
Ejemplo n.º 4
0
def collect_breakpoints(vcf, max_ci_dist):
    breakpoints = []
    for vline in vcf_variants(vcf.filename):
        v = vline.rstrip().split('\t')
        variant = Variant(v, vcf)
        if not variant.has_svtype(): continue
        if not variant.is_valid_svtype(): continue
        brkpts = vcf.get_variant_breakpoints(variant, max_ci_dist)
        if brkpts is None: continue
        breakpoints.append(brkpts)
    return breakpoints
Ejemplo n.º 5
0
def genotype_serial(src_vcf, out_vcf, sample, z, split_slop, min_aligned,
                    sum_quals, split_weight, disc_weight, max_reads,
                    max_ci_dist, debug):
    # initializations
    bnd_cache = {}
    src_vcf.write_header(out_vcf)
    total_variants = len(list(vcf_variants(src_vcf.filename)))

    # cleanup unused library attributes
    for rg in sample.rg_to_lib:
        sample.rg_to_lib[rg].cleanup()

    for i, vline in enumerate(vcf_variants(src_vcf.filename)):
        v = vline.rstrip().split('\t')
        variant = Variant(v, src_vcf)
        if i % 1000 == 0:
            logit("[ {} | {} ] Processing variant {}".format(
                i, total_variants, variant.var_id))
        if not sum_quals:
            variant.qual = 0

        if not variant.has_svtype():
            msg = ('Warning: SVTYPE missing '
                   'at variant %s. '
                   'Skipping.\n') % (variant.var_id)
            logit(msg)
            variant.write(out_vcf)
            continue

        if not variant.is_valid_svtype():
            msg = ('Warning: Unsupported SVTYPE '
                   'at variant %s (%s). '
                   'Skipping.\n') % (variant.var_id, variant.get_svtype())
            logit(msg)
            variant.write(out_vcf)
            continue

        breakpoints = src_vcf.get_variant_breakpoints(variant, max_ci_dist)

        # special BND processing
        if variant.get_svtype() == 'BND':
            if variant.info['MATEID'] in bnd_cache:
                variant2 = variant
                variant = bnd_cache[variant.info['MATEID']]
                del bnd_cache[variant.var_id]
            else:
                bnd_cache[variant.var_id] = variant
                continue

        if breakpoints is None:
            msg = ("Found no breakpoints for variant "
                   "'{}' ({})").format(variant.var_id, variant.get_svtype())
            logit(msg)
            continue

        result = serial_calculate_genotype(
            sample.bam, get_breakpoint_regions(breakpoints, sample,
                                               z), sample.rg_to_lib,
            sample.active_libs, sample.name, split_slop, min_aligned,
            split_weight, disc_weight, breakpoints, max_reads, debug)

        variant = assign_genotype_to_variant(variant, sample, result)
        variant.write(out_vcf)

        # special BND processing
        if variant.get_svtype() == 'BND':
            variant2.qual = variant.qual
            variant2.active_formats = variant.active_formats
            variant2.genotype = variant.genotype
            variant2.write(out_vcf)
Ejemplo n.º 6
0
def genotype_serial(src_vcf, out_vcf, sample, z, split_slop, min_aligned, sum_quals, split_weight, disc_weight, max_reads, max_ci_dist, debug):
    # initializations
    bnd_cache = {}
    src_vcf.write_header(out_vcf)
    total_variants = len(list(vcf_variants(src_vcf.filename)))

    # cleanup unused library attributes
    for rg in sample.rg_to_lib:
        sample.rg_to_lib[rg].cleanup()

    for i, vline in enumerate(vcf_variants(src_vcf.filename)):
        v = vline.rstrip().split('\t')
        variant = Variant(v, src_vcf)
        if i % 1000 == 0:
            logit("[ {} | {} ] Processing variant {}".format(i, total_variants, variant.var_id))
        if not sum_quals:
            variant.qual = 0

        if not variant.has_svtype():
            msg = ('Warning: SVTYPE missing '
                   'at variant %s. '
                   'Skipping.\n') % (variant.var_id)
            logit(msg)
            variant.write(out_vcf)
            continue

        if not variant.is_valid_svtype():
            msg = ('Warning: Unsupported SVTYPE '
                   'at variant %s (%s). '
                   'Skipping.\n') % (variant.var_id, variant.get_svtype())
            logit(msg)
            variant.write(out_vcf)
            continue

        breakpoints = src_vcf.get_variant_breakpoints(variant, max_ci_dist)

        # special BND processing
        if variant.get_svtype() == 'BND':
            if variant.info['MATEID'] in bnd_cache:
                variant2 = variant
                variant = bnd_cache[variant.info['MATEID']]
                del bnd_cache[variant.var_id]
            else:
                bnd_cache[variant.var_id] = variant
                continue

        if breakpoints is None:
            msg = ("Found no breakpoints for variant "
                   "'{}' ({})").format(variant.var_id, variant.get_svtype())
            logit(msg)
            continue

        result = serial_calculate_genotype(
                sample.bam,
                get_breakpoint_regions(breakpoints, sample, z),
                sample.rg_to_lib,
                sample.active_libs,
                sample.name,
                split_slop,
                min_aligned,
                split_weight,
                disc_weight,
                breakpoints,
                max_reads,
                debug
        )

        variant = assign_genotype_to_variant(variant, sample, result)
        variant.write(out_vcf)

        # special BND processing
        if variant.get_svtype() == 'BND':
            variant2.qual = variant.qual
            variant2.active_formats = variant.active_formats
            variant2.genotype = variant.genotype
            variant2.write(out_vcf)