def apply_classifier(self, final_classifier, experiment, all_test_target_scores, all_test_decoy_scores, table, p_score=False): lambda_ = CONFIG.get("final_statistics.lambda") mu, nu, final_score = self.calculate_params_for_d_score(final_classifier, experiment) experiment["d_score"] = (final_score - mu) / nu if (CONFIG.get("final_statistics.fdr_all_pg")): all_tt_scores = experiment.get_target_peaks()["d_score"] else: all_tt_scores = experiment.get_top_target_peaks()["d_score"] df_raw_stat, num_null, num_total = calculate_final_statistics(all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_) scored_table = self.enrich_table_with_results(table, experiment, df_raw_stat) if CONFIG.get("compute.probabilities"): logging.info( "" ) logging.info( "Posterior Probability estimation:" ) logging.info( "Estimated number of null %0.2f out of a total of %s. " % (num_null, num_total) ) # Note that num_null and num_total are the sum of the # cross-validated statistics computed before, therefore the total # number of data points selected will be # len(data) / xeval.fraction * xeval.num_iter # prior_chrom_null = num_null * 1.0 / num_total number_true_chromatograms = (1.0-prior_chrom_null) * len(experiment.get_top_target_peaks().df) number_target_pg = len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) prior_peakgroup_true = number_true_chromatograms / number_target_pg logging.info( "Prior for a peakgroup: %s" % (number_true_chromatograms / number_target_pg)) logging.info( "Prior for a chromatogram: %s" % str(1-prior_chrom_null) ) logging.info( "Estimated number of true chromatograms: %s out of %s" % (number_true_chromatograms, len(experiment.get_top_target_peaks().df)) ) logging.info( "Number of target data: %s" % len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) ) # pg_score = posterior probability for each peakgroup # h_score = posterior probability for the hypothesis that this peakgroup is true (and all other false) # h0_score = posterior probability for the hypothesis that no peakgroup is true pp_pg_pvalues = posterior_pg_prob(experiment, prior_peakgroup_true, lambda_=lambda_) experiment.df[ "pg_score"] = pp_pg_pvalues scored_table = scored_table.join(experiment[["pg_score"]]) allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null) experiment.df[ "h_score"] = allhypothesis experiment.df[ "h0_score"] = h0 scored_table = scored_table.join(experiment[["h_score", "h0_score"]]) final_statistics = final_err_table(df_raw_stat) summary_statistics = summary_err_table(df_raw_stat) needed_to_persist = (final_classifier, mu, nu, df_raw_stat.loc[:, ["svalue", "qvalue", "cutoff"]], num_null, num_total) return (summary_statistics, final_statistics, scored_table), needed_to_persist
def add_probabilities(self, scored_table, texp): lambda_ = CONFIG.get("final_statistics.lambda") pp_pg_pvalues = posterior_pg_prob(self.dvals, self.target_scores, self.decoy_scores, self.error_stat, self.number_target_peaks, self.number_target_pg, texp.df["d_score"], lambda_) texp.df["pg_score"] = pp_pg_pvalues scored_table = scored_table.join(texp[["pg_score"]]) prior_chrom_null = self.error_stat.num_null / self.error_stat.num_total allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(texp, prior_chrom_null) texp.df["h_score"] = allhypothesis texp.df["h0_score"] = h0 scored_table = scored_table.join(texp[["h_score", "h0_score"]]) return scored_table
def add_probabilities(self, scored_table, texp): lambda_ = CONFIG.get("final_statistics.lambda") pp_pg_pvalues = posterior_pg_prob(self.dvals, self.target_scores, self.decoy_scores, self.error_stat, self.number_target_peaks, self.number_target_pg, texp.df["d_score"], lambda_) texp.df["pg_score"] = pp_pg_pvalues scored_table = scored_table.join(texp[["pg_score"]]) prior_chrom_null = self.error_stat.num_null / self.error_stat.num_total allhypothesis, h0 = posterior_chromatogram_hypotheses_fast( texp, prior_chrom_null) texp.df["h_score"] = allhypothesis texp.df["h0_score"] = h0 scored_table = scored_table.join(texp[["h_score", "h0_score"]]) return scored_table
def apply_classifier(self, final_classifier, experiment, test_exp, all_test_target_scores, all_test_decoy_scores, table, p_score=False): lambda_ = CONFIG.get("final_statistics.lambda") mu, nu, final_score = self.calculate_params_for_d_score(final_classifier, experiment) experiment["d_score"] = (final_score - mu) / nu if (CONFIG.get("final_statistics.fdr_all_pg")): all_tt_scores = experiment.get_top_target_peaks()["d_score"] else: all_tt_scores = experiment.get_top_target_peaks()["d_score"] is_test = CONFIG.get("is_test", False) if is_test: d = { 'pyProph':FlexibleErrorTable( all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_, NormalNullModel(), MProphFDRCalc(), MProphStatCalc(), MProphStatSampler() ), 'nonParam':FlexibleErrorTable( all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_, NonParamNullModel(), MProphFDRCalc(), MProphStatCalc(), MProphStatSampler() ), 'logNormal':FlexibleErrorTable( all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_, LogNormalNullModel(), MProphFDRCalc(), MProphStatCalc(), MProphStatSampler() ), 'nonParam-storey':FlexibleErrorTable( all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_, NonParamNullModel(), StoreyFDRCalc(), MProphStatCalc(), MProphStatSampler() ), 'nonParam-storey-jt':FlexibleErrorTable( all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_, NonParamNullModel(), StoreyFDRCalc(), JTStatCalc(), MProphStatSampler() ) } d["res"] = d["pyProph"] if test_exp is not None: muT, nuT, final_scoreT = self.calculate_params_for_d_score(final_classifier, test_exp) test_exp["d_score"] = (final_scoreT - muT) / nuT d['true_pyProph'] = FlexibleErrorTable( all_tt_scores, test_exp.get_top_target_peaks()["d_score"], test_exp.get_top_decoy_peaks()["d_score"], lambda_, NormalNullModel(), MProphFDRCalc(), MProphStatCalc(), MProphStatSampler() ) d['true_nonParam'] = FlexibleErrorTable( all_tt_scores, test_exp.get_top_target_peaks()["d_score"], test_exp.get_top_decoy_peaks()["d_score"], lambda_, NonParamNullModel(), MProphFDRCalc(), MProphStatCalc(), MProphStatSampler() ) d['true_logNormal'] = FlexibleErrorTable( all_tt_scores, test_exp.get_top_target_peaks()["d_score"], test_exp.get_top_decoy_peaks()["d_score"], lambda_, LogNormalNullModel(), MProphFDRCalc(), MProphStatCalc(), MProphStatSampler() ) else: null_model = getNullModel(CONFIG.get("final_statistics.null_model")) fdr_calc = getFDRCalc(CONFIG.get("final_statistics.fdr_calc")) stat_calc = getStatCalc(CONFIG.get("final_statistics.stat_calc")) stat_sampler = getStatSampler(CONFIG.get("final_statistics.stat_sampler")) decoys_missing = CONFIG.get("decoy.missing", 0.0) d = dict( res = FlexibleErrorTable( all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_, null_model, fdr_calc, stat_calc, stat_sampler, decoys_missing ) ) def getRes(et): return (et.summary_table(), et.final_table(), et.enrich(table, experiment)) et = d["res"] sum_tab, fin_tab, score_tab = getRes(et) if CONFIG.get("compute.probabilities"): logging.info( "" ) logging.info( "Posterior Probability estimation:" ) logging.info( "Estimated number of null %0.2f out of a total of %s. " % (et.num_null, et.num_total) ) # Note that num_null and num_total are the sum of the # cross-validated statistics computed before, therefore the total # number of data points selected will be # len(data) / xeval.fraction * xeval.num_iter # prior_chrom_null = et.num_null * 1.0 / et.num_total number_true_chromatograms = (1.0-prior_chrom_null) * len(experiment.get_top_target_peaks().df) number_target_pg = len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) prior_peakgroup_true = number_true_chromatograms / number_target_pg logging.info( "Prior for a peakgroup: %s" % (number_true_chromatograms / number_target_pg)) logging.info( "Prior for a chromatogram: %s" % str(1-prior_chrom_null) ) logging.info( "Estimated number of true chromatograms: %s out of %s" % ( number_true_chromatograms, len(experiment.get_top_target_peaks().df)) ) logging.info( "Number of target data: %s" % len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) ) # pg_score = posterior probability for each peakgroup # h_score = posterior probability for the hypothesis that this peakgroup is true (and all other false) # h0_score = posterior probability for the hypothesis that no peakgroup is true pp_pg_pvalues = posterior_pg_prob(experiment, prior_peakgroup_true, lambda_=lambda_) experiment.df[ "pg_score"] = pp_pg_pvalues score_tab = score_tab.join(experiment[["pg_score"]]) allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null) experiment.df[ "h_score"] = allhypothesis experiment.df[ "h0_score"] = h0 score_tab = score_tab.join(experiment[["h_score", "h0_score"]]) needed_to_persist = (final_classifier, mu, nu, et.df.loc[:, ["svalue", "qvalue", "cutoff"]], et.num_null, et.num_total) return (sum_tab, fin_tab, score_tab), d, needed_to_persist