def _type_variants(self): self.out_json[self.sample]["variant_calls"] = {} gt = VariantTyper( expected_depths=self.expected_depths, error_rate=self.expected_error_rate, contamination_depths=self.contamination_depths, ignore_filtered=self.ignore_filtered, minor_freq=self.minor_freq, confidence_threshold=self.variant_confidence_threshold, filters=self.filters, model=self.model, kmer_size=self.kmer_size, min_proportion_expected_depth=self.min_proportion_expected_depth, ploidy=self.ploidy, ) genotypes = [] filters = [] for probe_id, probe_coverages in self.variant_covgs.items(): probe_name = self._name_to_id(probe_coverages.var_name) variant = None call = gt.type(probe_coverages, variant=probe_name) genotypes.append(sum(call["genotype"])) filters.append(int(call["info"]["filter"] == [])) if (sum(call["genotype"]) > 0 or not call["genotype"] or self.report_all_calls): self.variant_calls[probe_name] = call self.variant_calls_dict[probe_id] = call self.out_json[self.sample]["genotypes"] = genotypes self.out_json[self.sample]["filtered"] = filters self.out_json[self.sample]["variant_calls"] = self.variant_calls_dict
def _simulate_snps(self): correct_covg = np.random.poisson(lam=self.mean_depth, size=self.iterations) incorrect_covg = np.random.binomial(self.mean_depth, self.error_rate, size=self.iterations) # f = open('test.simulate_snps_data.tsv', 'w') # print('Correct_cov', 'Incorrect_cov', 'correct_k_count', 'incorrect_k_count', 'correct_percent_coverage', 'incorrect_percent_coverage', 'Cov', 'Conf', sep='\t', file=f) probe_coverage_list = [] vtyper = VariantTyper([self.mean_depth], error_rate=self.error_rate, kmer_size=self.kmer_length) for i in range(self.iterations): if correct_covg[i] + incorrect_covg[i] == 0: continue min_depth = 1 # not used? # Check what allele_length means in depth_to_expected_kmer_coun()! Probably need to change next two lines... correct_k_count = ( self.kmer_length * correct_covg[i] ) + 0.01 # see KmerCountGenotypeModel.depth_to_expected_kmer_count() incorrect_k_count = ( self.kmer_length * incorrect_covg[i] ) + 0.01 # see KmerCountGenotypeModel.depth_to_expected_kmer_count() # correct_percent_coverage = ConfThresholder._simulate_percent_coverage(int(correct_k_count), 2 + self.kmer_length) # incorrect_percent_coverage = ConfThresholder._simulate_percent_coverage(int(incorrect_k_count), 2 + self.kmer_length) correct_percent_coverage = 100 incorrect_percent_coverage = ConfThresholder._get_incorrect_kmer_percent_cov( int(incorrect_k_count), self.incorrect_kmer_to_pc_cov) correct_probe_coverage = ProbeCoverage( correct_percent_coverage, self.mean_depth, min_depth, correct_k_count, self.kmer_length, ) incorrect_probe_coverage = ProbeCoverage( incorrect_percent_coverage, self.mean_depth, min_depth, incorrect_k_count, self.kmer_length, ) vpc = VariantProbeCoverage([correct_probe_coverage], [incorrect_probe_coverage]) call = vtyper.type(vpc) cov = np.log10(correct_covg[i] + incorrect_covg[i]) conf = call["info"]["conf"] self.log_conf_and_covg.append((conf, cov)) # print(correct_covg[i], incorrect_covg[i], correct_k_count, incorrect_k_count, correct_percent_coverage, incorrect_percent_coverage, cov, conf, sep='\t', file=f) # f.close() self.log_conf_and_covg.sort(reverse=True)
def _type_variants(self): self.out_json[self.sample]["variant_calls"] = {} gt = VariantTyper( expected_depths=self.expected_depths, error_rate=self.expected_error_rate, contamination_depths=self.contamination_depths, ignore_filtered=self.ignore_filtered, minor_freq=self.minor_freq, confidence_threshold=self.variant_confidence_threshold, filters=self.filters, model=self.model, kmer_size=self.kmer_size, min_proportion_expected_depth=self.min_proportion_expected_depth, ploidy=self.ploidy, ) genotypes = [] filters = [] for probe_id, probe_coverages in self.variant_covgs.items(): probe_name = self._name_to_id(probe_coverages.var_name) call = gt.type(probe_coverages, variant=probe_name) genotypes.append(sum(call["genotype"])) filters.append(int(call["info"]["filter"] == [])) if (sum(call["genotype"]) > 0 or not call["genotype"] or self.report_all_calls): self.variant_calls[probe_name] = call self.variant_calls_dict[probe_id] = call # note: here's an example probe_coverages.var_name: # ref-K43R?var_name=AAG781686AGA&num_alts=1&ref=NC_000962.3&enum=0&gene=rpsL&mut=K43R self._update_lineage_calls_dict( call, probe_name=probe_coverages.var_name) self.out_json[self.sample]["genotypes"] = genotypes self.out_json[self.sample]["filtered"] = filters self.out_json[self.sample]["variant_calls"] = self.variant_calls_dict lineage_result, lineage_calls = self.predict_lineage() self.out_json[self.sample]["lineage"] = { "all_calls": lineage_calls, "result": lineage_result, }