예제 #1
0
    def _type_variants(self):
        self.out_json[self.sample]["variant_calls"] = {}
        gt = VariantTyper(
            expected_depths=self.expected_depths,
            error_rate=self.expected_error_rate,
            contamination_depths=self.contamination_depths,
            ignore_filtered=self.ignore_filtered,
            minor_freq=self.minor_freq,
            confidence_threshold=self.variant_confidence_threshold,
            filters=self.filters,
            model=self.model,
            kmer_size=self.kmer_size,
            min_proportion_expected_depth=self.min_proportion_expected_depth,
            ploidy=self.ploidy,
        )
        genotypes = []
        filters = []
        for probe_id, probe_coverages in self.variant_covgs.items():
            probe_name = self._name_to_id(probe_coverages.var_name)

            variant = None

            call = gt.type(probe_coverages, variant=probe_name)

            genotypes.append(sum(call["genotype"]))
            filters.append(int(call["info"]["filter"] == []))
            if (sum(call["genotype"]) > 0 or not call["genotype"]
                    or self.report_all_calls):
                self.variant_calls[probe_name] = call
                self.variant_calls_dict[probe_id] = call
        self.out_json[self.sample]["genotypes"] = genotypes
        self.out_json[self.sample]["filtered"] = filters
        self.out_json[self.sample]["variant_calls"] = self.variant_calls_dict
예제 #2
0
파일: amr.py 프로젝트: leoisl/mykrobe
    def _simulate_snps(self):
        correct_covg = np.random.poisson(lam=self.mean_depth,
                                         size=self.iterations)
        incorrect_covg = np.random.binomial(self.mean_depth,
                                            self.error_rate,
                                            size=self.iterations)
        # f = open('test.simulate_snps_data.tsv', 'w')
        # print('Correct_cov', 'Incorrect_cov', 'correct_k_count', 'incorrect_k_count', 'correct_percent_coverage', 'incorrect_percent_coverage', 'Cov', 'Conf', sep='\t', file=f)
        probe_coverage_list = []
        vtyper = VariantTyper([self.mean_depth],
                              error_rate=self.error_rate,
                              kmer_size=self.kmer_length)

        for i in range(self.iterations):
            if correct_covg[i] + incorrect_covg[i] == 0:
                continue

            min_depth = 1  # not used?
            # Check what allele_length means in depth_to_expected_kmer_coun()! Probably need to change next two lines...
            correct_k_count = (
                self.kmer_length * correct_covg[i]
            ) + 0.01  #  see KmerCountGenotypeModel.depth_to_expected_kmer_count()
            incorrect_k_count = (
                self.kmer_length * incorrect_covg[i]
            ) + 0.01  #  see KmerCountGenotypeModel.depth_to_expected_kmer_count()

            # correct_percent_coverage = ConfThresholder._simulate_percent_coverage(int(correct_k_count), 2 + self.kmer_length)
            # incorrect_percent_coverage = ConfThresholder._simulate_percent_coverage(int(incorrect_k_count), 2 + self.kmer_length)
            correct_percent_coverage = 100
            incorrect_percent_coverage = ConfThresholder._get_incorrect_kmer_percent_cov(
                int(incorrect_k_count), self.incorrect_kmer_to_pc_cov)
            correct_probe_coverage = ProbeCoverage(
                correct_percent_coverage,
                self.mean_depth,
                min_depth,
                correct_k_count,
                self.kmer_length,
            )
            incorrect_probe_coverage = ProbeCoverage(
                incorrect_percent_coverage,
                self.mean_depth,
                min_depth,
                incorrect_k_count,
                self.kmer_length,
            )
            vpc = VariantProbeCoverage([correct_probe_coverage],
                                       [incorrect_probe_coverage])
            call = vtyper.type(vpc)

            cov = np.log10(correct_covg[i] + incorrect_covg[i])
            conf = call["info"]["conf"]
            self.log_conf_and_covg.append((conf, cov))
            # print(correct_covg[i], incorrect_covg[i], correct_k_count, incorrect_k_count, correct_percent_coverage, incorrect_percent_coverage, cov, conf, sep='\t', file=f)

        # f.close()
        self.log_conf_and_covg.sort(reverse=True)
예제 #3
0
    def _type_variants(self):
        self.out_json[self.sample]["variant_calls"] = {}
        gt = VariantTyper(
            expected_depths=self.expected_depths,
            error_rate=self.expected_error_rate,
            contamination_depths=self.contamination_depths,
            ignore_filtered=self.ignore_filtered,
            minor_freq=self.minor_freq,
            confidence_threshold=self.variant_confidence_threshold,
            filters=self.filters,
            model=self.model,
            kmer_size=self.kmer_size,
            min_proportion_expected_depth=self.min_proportion_expected_depth,
            ploidy=self.ploidy,
        )
        genotypes = []
        filters = []

        for probe_id, probe_coverages in self.variant_covgs.items():
            probe_name = self._name_to_id(probe_coverages.var_name)
            call = gt.type(probe_coverages, variant=probe_name)
            genotypes.append(sum(call["genotype"]))
            filters.append(int(call["info"]["filter"] == []))

            if (sum(call["genotype"]) > 0 or not call["genotype"]
                    or self.report_all_calls):
                self.variant_calls[probe_name] = call
                self.variant_calls_dict[probe_id] = call

            # note: here's an example probe_coverages.var_name:
            # ref-K43R?var_name=AAG781686AGA&num_alts=1&ref=NC_000962.3&enum=0&gene=rpsL&mut=K43R
            self._update_lineage_calls_dict(
                call, probe_name=probe_coverages.var_name)

        self.out_json[self.sample]["genotypes"] = genotypes
        self.out_json[self.sample]["filtered"] = filters
        self.out_json[self.sample]["variant_calls"] = self.variant_calls_dict
        lineage_result, lineage_calls = self.predict_lineage()
        self.out_json[self.sample]["lineage"] = {
            "all_calls": lineage_calls,
            "result": lineage_result,
        }