def peptide_fdr(psms, peptide_fdr_threshold, pi0_lambda, plot_path, nofdr): pi0_method = 'bootstrap' pi0_smooth_df = 3 pi0_smooth_log_pi0 = False pfdr = False if nofdr: peptides = psms.groupby(['modified_peptide', 'decoy', 'q_value'])['pp'].max().reset_index() targets = peptides[~peptides['decoy']].copy() decoys = peptides[peptides['decoy']].copy() else: peptides = psms.groupby(['modified_peptide', 'decoy'])['pp'].max().reset_index() targets = peptides[~peptides['decoy']].copy() decoys = peptides[peptides['decoy']].copy() targets['p_value'] = pemp(targets['pp'], decoys['pp']) targets['q_value'] = qvalue( targets['p_value'], pi0est(targets['p_value'], pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0)['pi0'], pfdr) plot(plot_path, "global peptide scores", targets['pp'], decoys['pp']) return targets[targets['q_value'] < peptide_fdr_threshold][ 'modified_peptide'], np.min( targets[targets['q_value'] < peptide_fdr_threshold]['pp'])
def test_qvalue(tmpdir, regtest): os.chdir(tmpdir.strpath) data_path = os.path.join(DATA_FOLDER, "test_qvalue_ref_data.csv") shutil.copy(data_path, tmpdir.strpath) stat = pd.read_csv('test_qvalue_ref_data.csv', delimiter=',').sort_values("p") # For comparison with R/bioconductor reference implementation np.testing.assert_almost_equal(qvalue(stat['p'], 0.669926026474838, pfdr=False), stat['q_default'].values, decimal=4) np.testing.assert_almost_equal(qvalue(stat['p'], 0.669926026474838, pfdr=True), stat['q_pfdr'].values, decimal=4)
def protein_fdr(psms, protein_fdr_threshold, pi0_lambda, plot_path): pi0_method = 'bootstrap' pi0_smooth_df = 3 pi0_smooth_log_pi0 = False pfdr = False proteins = psms.groupby(['protein_id', 'decoy'])['pp'].max().reset_index() targets = proteins[~proteins['decoy']].copy() decoys = proteins[proteins['decoy']].copy() targets['p_value'] = pemp(targets['pp'], decoys['pp']) targets['q_value'] = qvalue( targets['p_value'], pi0est(targets['p_value'], pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0)['pi0'], pfdr) plot(plot_path, "global protein scores", targets['pp'], decoys['pp']) return targets[ targets['q_value'] < protein_fdr_threshold]['protein_id'], np.min( targets[targets['q_value'] < protein_fdr_threshold]['pp'])
def combine_scores(self, scores): combined_scores = scores.groupby( ['condition_id', 'bait_id', 'prey_id', 'decoy', 'confidence_bin'])['score'].mean().reset_index() combined_scores.loc[combined_scores['decoy'] == 0, 'pvalue'] = pemp( combined_scores[combined_scores['decoy'] == 0]['score'], combined_scores[combined_scores['decoy'] == 1]['score']) pi0_combined = pi0est( combined_scores[combined_scores['decoy'] == 0]['pvalue'], self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0)['pi0'] combined_scores.loc[combined_scores['decoy'] == 0, 'qvalue'] = qvalue( combined_scores[combined_scores['decoy'] == 0]['pvalue'], pi0_combined, self.pfdr) click.echo("Info: Unique interactions detected before integration:") click.echo( "%s (at q-value < 0.01)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.01)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo( "%s (at q-value < 0.05)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.05)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.1)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.1)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.2)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.2)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.5)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.5)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("Info: Unique interactions detected after integration:") click.echo("%s (at q-value < 0.01)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.01)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.05)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.05)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.1)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.1)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.2)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.2)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.5)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.5)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("Info: Combined pi0: %s." % pi0_combined) return combined_scores