def _parse_seq_panel(self, row): ( allele, median_depth, min_depth, percent_coverage, k_count, klen, ) = self._parse_summary_covgs_row(row) probe_coverage = ProbeCoverage( percent_coverage=percent_coverage, median_depth=median_depth, min_depth=min_depth, k_count=k_count, klen=klen, ) allele_name = allele.split("?")[0] params = get_params(allele) panel_type = params.get("panel_type", "presence") name = params.get("name") version = params.get("version", "1") if panel_type in ["variant", "presence"]: sequence_probe_coverage = SequenceProbeCoverage( name=name, probe_coverage=probe_coverage, version=version, length=params.get("length"), ) try: self.covgs[panel_type][name][version] = sequence_probe_coverage except KeyError: self.covgs[panel_type][name] = {} self.covgs[panel_type][name][version] = sequence_probe_coverage else: # Species panels are treated differently l = int(params.get("length", -1)) try: self.covgs[panel_type][name]["total_bases"] += l if percent_coverage > 75 and median_depth > 0: self.covgs[panel_type][name]["percent_coverage"].append( percent_coverage) self.covgs[panel_type][name]["length"].append(l) self.covgs[panel_type][name]["median"].append(median_depth) except KeyError: if panel_type not in self.covgs: self.covgs[panel_type] = {} self.covgs[panel_type][name] = {} self.covgs[panel_type][name]["total_bases"] = l if percent_coverage > 75 and median_depth > 0: self.covgs[panel_type][name]["percent_coverage"] = [ percent_coverage ] self.covgs[panel_type][name]["length"] = [l] self.covgs[panel_type][name]["median"] = [median_depth] else: self.covgs[panel_type][name]["percent_coverage"] = [] self.covgs[panel_type][name]["length"] = [] self.covgs[panel_type][name]["median"] = []
def _parse_variant_panel(self, row): allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) params = get_params(allele) if 'var_name' in params: var_name = params.get('var_name') else: var_name = allele.split('?')[0].split('-')[1] num_alts = int(params.get("num_alts", 0)) reference_coverages = [ ProbeCoverage(percent_coverage=percent_coverage, median_depth=median_depth, min_depth=min_depth, k_count=k_count) ] alt_or_ref = 'ref' alternate_coverages = [] for i in range(num_alts - 1): row = next(self.reader) ref_allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) if ref_allele.split('-')[0] != 'ref': logger.warning("Fewer ref alleles than alt alleles for %s" % ref_allele) alternate_coverages.append( ProbeCoverage(min_depth=min_depth, k_count=k_count, percent_coverage=percent_coverage, median_depth=median_depth)) num_alts -= 1 break assert ref_allele.split('-')[0] == 'ref' reference_coverages.append( ProbeCoverage(percent_coverage=percent_coverage, median_depth=median_depth, min_depth=min_depth, k_count=k_count)) for i in range(num_alts): row = next(self.reader) alt_allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row( row) assert alt_allele.split('-')[0] == 'alt' alternate_coverages.append( ProbeCoverage(min_depth=min_depth, k_count=k_count, percent_coverage=percent_coverage, median_depth=median_depth)) variant_probe_coverage = VariantProbeCoverage( reference_coverages=reference_coverages, alternate_coverages=alternate_coverages, var_name=var_name, params=params) try: self.variant_covgs[allele].append(variant_probe_coverage) except KeyError: self.variant_covgs[allele] = [variant_probe_coverage]
def _name_to_id(self, probe_name): names = [] params = get_params(probe_name) if params.get("mut"): names.append("_".join([params.get("gene"), params.get("mut")])) var_name = params.get("var_name") else: var_name = probe_name.split("?")[0].split("-")[1] names.append(var_name) return "-".join(names)
def _get_names(self, allele_name): names = [] params = get_params(allele_name) if params.get("mut"): names.append("_".join([params.get("gene"), params.get("mut")])) allele_name_split = allele_name.split('?')[0].split('-') if len(allele_name_split) > 1: names.append(allele_name_split[1]) else: names.append(allele_name_split[0]) return names
def _update_lineage_calls_dict(self, probe_name, call): if self.lineage_variants is None: return params = get_params(probe_name) try: var_name = params["var_name"] lineage = self.lineage_variants[var_name] except KeyError: return if lineage["name"] not in self.lineage_calls_dict: self.lineage_calls_dict[lineage["name"]] = {} self.lineage_calls_dict[lineage["name"]][var_name] = call
def _create_variant(self, probe_name): names = [] params = get_params(probe_name) if params.get("mut"): names.append("_".join([params.get("gene"), params.get("mut")])) var_name = probe_name.split('?')[0].split('-')[1] names.append(var_name) try: # If it's a variant panel we can create a variant ref, start, alt = split_var_name(var_name) return Variant.create(start=start, reference_bases=ref, alternate_bases=[alt], names=names, info=params) except AttributeError: return None
def _parse_variant_panel(self, row): ( probe, median_depth, min_depth, percent_coverage, k_count, klen, ) = self._parse_summary_covgs_row(row) params = get_params(probe) probe_type = probe.split("-")[0] if "var_name" in params: var_name = (params.get("gene", "") + "_" + params.get("mut", "") + "-" + params.get("var_name", "")) else: var_name = allele.split("?")[0].split("-")[1] if not var_name in self.variant_covgs: variant_probe_coverage = VariantProbeCoverage( reference_coverages=[], alternate_coverages=[], var_name=probe, params=params, ) self.variant_covgs[var_name] = variant_probe_coverage probe_coverage = ProbeCoverage( min_depth=min_depth, k_count=k_count, percent_coverage=percent_coverage, median_depth=median_depth, klen=klen, ) if probe_type == "ref": self.variant_covgs[var_name].reference_coverages.append( probe_coverage) self.variant_covgs[ var_name].best_reference_coverage = self.variant_covgs[ var_name]._choose_best_reference_coverage() elif probe_type == "alt": self.variant_covgs[var_name].alternate_coverages.append( probe_coverage) self.variant_covgs[ var_name].best_alternate_coverage = self.variant_covgs[ var_name]._choose_best_alternate_coverage() else: raise ValueError("probe_type must be ref or alt")
def _update_lineage_calls_dict(self, call, probe_name=None, var_name=None): if self.lineage_variants is None: return if probe_name is not None: # probe_name is expected be of the form eg: # ref-K43R?var_name=AAG781686AGA&num_alts=1&ref=NC_000962.3&enum=0&gene=rpsL&mut=K43R # and we want the var_name entry from that params = get_params(probe_name) try: var_name = params["var_name"] lineage = self.lineage_variants[var_name] except KeyError: return elif var_name is not None: # We've been provided the var_name part of a probe_name, so no need to extract # from a full probe_name lineage = self.lineage_variants[var_name] else: raise Exception("Must provide probe_name or var_name") if lineage["name"] not in self.lineage_calls_dict: self.lineage_calls_dict[lineage["name"]] = {} self.lineage_calls_dict[lineage["name"]][var_name] = call