def apply_genotypes_to_vcf(src_vcf, out_vcf, genotypes, sample, sum_quals): # initializations bnd_cache = {} src_vcf.write_header(out_vcf) total_variants = len(list(vcf_variants(src_vcf.filename))) for i, vline in enumerate(vcf_variants(src_vcf.filename)): v = vline.rstrip().split('\t') variant = Variant(v, src_vcf) if not sum_quals: variant.qual = 0 if not variant.has_svtype(): msg = ('Warning: SVTYPE missing ' 'at variant %s. ' 'Skipping.\n') % (variant.var_id) logit(msg) variant.write(out_vcf) continue if not variant.is_valid_svtype(): msg = ('Warning: Unsupported SVTYPE ' 'at variant %s (%s). ' 'Skipping.\n') % (variant.var_id, variant.get_svtype()) logit(msg) variant.write(out_vcf) continue # special BND processing if variant.get_svtype() == 'BND': if variant.info['MATEID'] in bnd_cache: variant2 = variant variant = bnd_cache[variant.info['MATEID']] del bnd_cache[variant.var_id] else: bnd_cache[variant.var_id] = variant continue result = genotypes[variant.var_id] if result is None: msg = ("Found no genotype results for variant " "'{}' ({})").format(variant.var_id, variant.get_svtype()) logit(msg) raise RuntimeError(msg) variant = assign_genotype_to_variant(variant, sample, result) variant.write(out_vcf) # special BND processing if variant.get_svtype() == 'BND': variant2.qual = variant.qual variant2.active_formats = variant.active_formats variant2.genotype = variant.genotype variant2.write(out_vcf)
def genotype_serial(src_vcf, out_vcf, sample, z, split_slop, min_aligned, sum_quals, split_weight, disc_weight, max_reads, max_ci_dist, debug): # initializations bnd_cache = {} src_vcf.write_header(out_vcf) total_variants = len(list(vcf_variants(src_vcf.filename))) # cleanup unused library attributes for rg in sample.rg_to_lib: sample.rg_to_lib[rg].cleanup() for i, vline in enumerate(vcf_variants(src_vcf.filename)): v = vline.rstrip().split('\t') variant = Variant(v, src_vcf) if i % 1000 == 0: logit("[ {} | {} ] Processing variant {}".format( i, total_variants, variant.var_id)) if not sum_quals: variant.qual = 0 if not variant.has_svtype(): msg = ('Warning: SVTYPE missing ' 'at variant %s. ' 'Skipping.\n') % (variant.var_id) logit(msg) variant.write(out_vcf) continue if not variant.is_valid_svtype(): msg = ('Warning: Unsupported SVTYPE ' 'at variant %s (%s). ' 'Skipping.\n') % (variant.var_id, variant.get_svtype()) logit(msg) variant.write(out_vcf) continue breakpoints = src_vcf.get_variant_breakpoints(variant, max_ci_dist) # special BND processing if variant.get_svtype() == 'BND': if variant.info['MATEID'] in bnd_cache: variant2 = variant variant = bnd_cache[variant.info['MATEID']] del bnd_cache[variant.var_id] else: bnd_cache[variant.var_id] = variant continue if breakpoints is None: msg = ("Found no breakpoints for variant " "'{}' ({})").format(variant.var_id, variant.get_svtype()) logit(msg) continue result = serial_calculate_genotype( sample.bam, get_breakpoint_regions(breakpoints, sample, z), sample.rg_to_lib, sample.active_libs, sample.name, split_slop, min_aligned, split_weight, disc_weight, breakpoints, max_reads, debug) variant = assign_genotype_to_variant(variant, sample, result) variant.write(out_vcf) # special BND processing if variant.get_svtype() == 'BND': variant2.qual = variant.qual variant2.active_formats = variant.active_formats variant2.genotype = variant.genotype variant2.write(out_vcf)
def genotype_serial(src_vcf, out_vcf, sample, z, split_slop, min_aligned, sum_quals, split_weight, disc_weight, max_reads, max_ci_dist, debug): # initializations bnd_cache = {} src_vcf.write_header(out_vcf) total_variants = len(list(vcf_variants(src_vcf.filename))) # cleanup unused library attributes for rg in sample.rg_to_lib: sample.rg_to_lib[rg].cleanup() for i, vline in enumerate(vcf_variants(src_vcf.filename)): v = vline.rstrip().split('\t') variant = Variant(v, src_vcf) if i % 1000 == 0: logit("[ {} | {} ] Processing variant {}".format(i, total_variants, variant.var_id)) if not sum_quals: variant.qual = 0 if not variant.has_svtype(): msg = ('Warning: SVTYPE missing ' 'at variant %s. ' 'Skipping.\n') % (variant.var_id) logit(msg) variant.write(out_vcf) continue if not variant.is_valid_svtype(): msg = ('Warning: Unsupported SVTYPE ' 'at variant %s (%s). ' 'Skipping.\n') % (variant.var_id, variant.get_svtype()) logit(msg) variant.write(out_vcf) continue breakpoints = src_vcf.get_variant_breakpoints(variant, max_ci_dist) # special BND processing if variant.get_svtype() == 'BND': if variant.info['MATEID'] in bnd_cache: variant2 = variant variant = bnd_cache[variant.info['MATEID']] del bnd_cache[variant.var_id] else: bnd_cache[variant.var_id] = variant continue if breakpoints is None: msg = ("Found no breakpoints for variant " "'{}' ({})").format(variant.var_id, variant.get_svtype()) logit(msg) continue result = serial_calculate_genotype( sample.bam, get_breakpoint_regions(breakpoints, sample, z), sample.rg_to_lib, sample.active_libs, sample.name, split_slop, min_aligned, split_weight, disc_weight, breakpoints, max_reads, debug ) variant = assign_genotype_to_variant(variant, sample, result) variant.write(out_vcf) # special BND processing if variant.get_svtype() == 'BND': variant2.qual = variant.qual variant2.active_formats = variant.active_formats variant2.genotype = variant.genotype variant2.write(out_vcf)