Example #1
0
    def apply_classifier(self, final_classifier, experiment, all_test_target_scores,
                         all_test_decoy_scores, table, p_score=False):

        lambda_ = CONFIG.get("final_statistics.lambda")

        mu, nu, final_score = self.calculate_params_for_d_score(final_classifier, experiment)
        experiment["d_score"] = (final_score - mu) / nu

        if (CONFIG.get("final_statistics.fdr_all_pg")):
            all_tt_scores = experiment.get_target_peaks()["d_score"]
        else:
            all_tt_scores = experiment.get_top_target_peaks()["d_score"]

        df_raw_stat, num_null, num_total = calculate_final_statistics(all_tt_scores, all_test_target_scores,
                                                 all_test_decoy_scores, lambda_)

        scored_table = self.enrich_table_with_results(table, experiment, df_raw_stat)

        if CONFIG.get("compute.probabilities"):
            logging.info( "" )
            logging.info( "Posterior Probability estimation:" )
            logging.info( "Estimated number of null %0.2f out of a total of %s. " % (num_null, num_total) )

            # Note that num_null and num_total are the sum of the
            # cross-validated statistics computed before, therefore the total
            # number of data points selected will be 
            #   len(data) /  xeval.fraction * xeval.num_iter
            # 
            prior_chrom_null = num_null * 1.0 / num_total
            number_true_chromatograms = (1.0-prior_chrom_null) * len(experiment.get_top_target_peaks().df)
            number_target_pg = len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df )
            prior_peakgroup_true = number_true_chromatograms / number_target_pg

            logging.info( "Prior for a peakgroup: %s" % (number_true_chromatograms / number_target_pg))
            logging.info( "Prior for a chromatogram: %s" % str(1-prior_chrom_null) )
            logging.info( "Estimated number of true chromatograms: %s out of %s" % (number_true_chromatograms, len(experiment.get_top_target_peaks().df)) )
            logging.info( "Number of target data: %s" % len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) )

            # pg_score = posterior probability for each peakgroup
            # h_score = posterior probability for the hypothesis that this peakgroup is true (and all other false)
            # h0_score = posterior probability for the hypothesis that no peakgroup is true

            pp_pg_pvalues = posterior_pg_prob(experiment, prior_peakgroup_true, lambda_=lambda_)
            experiment.df[ "pg_score"]  = pp_pg_pvalues
            scored_table = scored_table.join(experiment[["pg_score"]])

            allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null)
            experiment.df[ "h_score"]  = allhypothesis
            experiment.df[ "h0_score"]  = h0
            scored_table = scored_table.join(experiment[["h_score", "h0_score"]])

        final_statistics = final_err_table(df_raw_stat)
        summary_statistics = summary_err_table(df_raw_stat)

        needed_to_persist = (final_classifier, mu, nu,
                             df_raw_stat.loc[:, ["svalue", "qvalue", "cutoff"]], num_null, num_total)
        return (summary_statistics, final_statistics, scored_table), needed_to_persist
Example #2
0
    def add_probabilities(self, scored_table, texp):

        lambda_ = CONFIG.get("final_statistics.lambda")
        pp_pg_pvalues = posterior_pg_prob(self.dvals, self.target_scores, self.decoy_scores,
                                          self.error_stat, self.number_target_peaks,
                                          self.number_target_pg,
                                          texp.df["d_score"],
                                          lambda_)
        texp.df["pg_score"] = pp_pg_pvalues
        scored_table = scored_table.join(texp[["pg_score"]])

        prior_chrom_null = self.error_stat.num_null / self.error_stat.num_total
        allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(texp, prior_chrom_null)
        texp.df["h_score"] = allhypothesis
        texp.df["h0_score"] = h0
        scored_table = scored_table.join(texp[["h_score", "h0_score"]])

        return scored_table
Example #3
0
    def add_probabilities(self, scored_table, texp):

        lambda_ = CONFIG.get("final_statistics.lambda")
        pp_pg_pvalues = posterior_pg_prob(self.dvals, self.target_scores,
                                          self.decoy_scores, self.error_stat,
                                          self.number_target_peaks,
                                          self.number_target_pg,
                                          texp.df["d_score"], lambda_)
        texp.df["pg_score"] = pp_pg_pvalues
        scored_table = scored_table.join(texp[["pg_score"]])

        prior_chrom_null = self.error_stat.num_null / self.error_stat.num_total
        allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(
            texp, prior_chrom_null)
        texp.df["h_score"] = allhypothesis
        texp.df["h0_score"] = h0
        scored_table = scored_table.join(texp[["h_score", "h0_score"]])

        return scored_table
Example #4
0
	def apply_classifier(self, final_classifier, experiment, test_exp, all_test_target_scores,
						 all_test_decoy_scores, table, p_score=False):

		lambda_ = CONFIG.get("final_statistics.lambda")

		mu, nu, final_score = self.calculate_params_for_d_score(final_classifier, experiment)
		experiment["d_score"] = (final_score - mu) / nu

		if (CONFIG.get("final_statistics.fdr_all_pg")):
			all_tt_scores = experiment.get_top_target_peaks()["d_score"]
		else:
			all_tt_scores = experiment.get_top_target_peaks()["d_score"]

		is_test	 = CONFIG.get("is_test", False)
		
		if is_test:
			d = {
				'pyProph':FlexibleErrorTable(
									all_tt_scores,
									all_test_target_scores,
									all_test_decoy_scores,
									lambda_,
									NormalNullModel(),
									MProphFDRCalc(),
									MProphStatCalc(),
									MProphStatSampler()
							),
				'nonParam':FlexibleErrorTable(
									all_tt_scores,
									all_test_target_scores,
									all_test_decoy_scores,
									lambda_,
									NonParamNullModel(),
									MProphFDRCalc(),
									MProphStatCalc(),
									MProphStatSampler()
							),
				'logNormal':FlexibleErrorTable(
									all_tt_scores,
									all_test_target_scores,
									all_test_decoy_scores,
									lambda_,
									LogNormalNullModel(),
									MProphFDRCalc(),
									MProphStatCalc(),
									MProphStatSampler()
							),
				'nonParam-storey':FlexibleErrorTable(
									all_tt_scores,
									all_test_target_scores,
									all_test_decoy_scores,
									lambda_,
									NonParamNullModel(),
									StoreyFDRCalc(),
									MProphStatCalc(),
									MProphStatSampler()
							),
				'nonParam-storey-jt':FlexibleErrorTable(
									all_tt_scores,
									all_test_target_scores,
									all_test_decoy_scores,
									lambda_,
									NonParamNullModel(),
									StoreyFDRCalc(),
									JTStatCalc(),
									MProphStatSampler()
							)
				}
			
			d["res"] = d["pyProph"]
		
			if test_exp is not None:
				muT, nuT, final_scoreT = self.calculate_params_for_d_score(final_classifier, test_exp)
				test_exp["d_score"] = (final_scoreT - muT) / nuT
				d['true_pyProph'] = FlexibleErrorTable(
						all_tt_scores, 
						test_exp.get_top_target_peaks()["d_score"],
						test_exp.get_top_decoy_peaks()["d_score"],
						lambda_,
						NormalNullModel(),
						MProphFDRCalc(),
						MProphStatCalc(),
						MProphStatSampler()
					)
				d['true_nonParam'] = FlexibleErrorTable(
						all_tt_scores, 
						test_exp.get_top_target_peaks()["d_score"],
						test_exp.get_top_decoy_peaks()["d_score"],
						lambda_,
						NonParamNullModel(),
						MProphFDRCalc(),
						MProphStatCalc(),
						MProphStatSampler()
					)
				d['true_logNormal'] = FlexibleErrorTable(
						all_tt_scores,
						test_exp.get_top_target_peaks()["d_score"],
						test_exp.get_top_decoy_peaks()["d_score"],
						lambda_,
						LogNormalNullModel(),
						MProphFDRCalc(),
						MProphStatCalc(),
						MProphStatSampler()
					)
		else:
			 null_model 	= getNullModel(CONFIG.get("final_statistics.null_model"))
			 fdr_calc 		= getFDRCalc(CONFIG.get("final_statistics.fdr_calc"))
			 stat_calc 		= getStatCalc(CONFIG.get("final_statistics.stat_calc"))
			 stat_sampler 	= getStatSampler(CONFIG.get("final_statistics.stat_sampler"))
			 decoys_missing	= CONFIG.get("decoy.missing", 0.0)
			 d = dict(
			 		res = FlexibleErrorTable(
						all_tt_scores,
						all_test_target_scores,
						all_test_decoy_scores,
						lambda_,
						null_model,
						fdr_calc,
						stat_calc,
						stat_sampler,
						decoys_missing
					)
			 	)

		def getRes(et):
			return (et.summary_table(), et.final_table(), et.enrich(table, experiment))

		et = d["res"]
		sum_tab, fin_tab, score_tab = getRes(et)

		if CONFIG.get("compute.probabilities"):
			logging.info( "" )
			logging.info( "Posterior Probability estimation:" )
			logging.info( "Estimated number of null %0.2f out of a total of %s. " % (et.num_null, et.num_total) )

			# Note that num_null and num_total are the sum of the
			# cross-validated statistics computed before, therefore the total
			# number of data points selected will be 
			#	len(data) /  xeval.fraction * xeval.num_iter
			# 
			prior_chrom_null = et.num_null * 1.0 / et.num_total
			number_true_chromatograms = (1.0-prior_chrom_null) * len(experiment.get_top_target_peaks().df)
			number_target_pg = len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df )
			prior_peakgroup_true = number_true_chromatograms / number_target_pg

			logging.info( "Prior for a peakgroup: %s" % (number_true_chromatograms / number_target_pg))
			logging.info( "Prior for a chromatogram: %s" % str(1-prior_chrom_null) )
			logging.info( "Estimated number of true chromatograms: %s out of %s" % (
				number_true_chromatograms, len(experiment.get_top_target_peaks().df)) )
			logging.info( "Number of target data: %s" % len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) )

			# pg_score = posterior probability for each peakgroup
			# h_score = posterior probability for the hypothesis that this peakgroup is true (and all other false)
			# h0_score = posterior probability for the hypothesis that no peakgroup is true

			pp_pg_pvalues = posterior_pg_prob(experiment, prior_peakgroup_true, lambda_=lambda_)
			experiment.df[ "pg_score"]  = pp_pg_pvalues
			score_tab = score_tab.join(experiment[["pg_score"]])

			allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null)
			experiment.df[ "h_score"]  = allhypothesis
			experiment.df[ "h0_score"]  = h0
			score_tab = score_tab.join(experiment[["h_score", "h0_score"]])

		needed_to_persist = (final_classifier, mu, nu,
					et.df.loc[:, ["svalue", "qvalue", "cutoff"]], et.num_null, et.num_total)
		return (sum_tab, fin_tab, score_tab), d, needed_to_persist