def peptide_fdr(psms, peptide_fdr_threshold, pi0_lambda, plot_path, nofdr): pi0_method = 'bootstrap' pi0_smooth_df = 3 pi0_smooth_log_pi0 = False pfdr = False if nofdr: peptides = psms.groupby(['modified_peptide', 'decoy', 'q_value'])['pp'].max().reset_index() targets = peptides[~peptides['decoy']].copy() decoys = peptides[peptides['decoy']].copy() else: peptides = psms.groupby(['modified_peptide', 'decoy'])['pp'].max().reset_index() targets = peptides[~peptides['decoy']].copy() decoys = peptides[peptides['decoy']].copy() targets['p_value'] = pemp(targets['pp'], decoys['pp']) targets['q_value'] = qvalue( targets['p_value'], pi0est(targets['p_value'], pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0)['pi0'], pfdr) plot(plot_path, "global peptide scores", targets['pp'], decoys['pp']) return targets[targets['q_value'] < peptide_fdr_threshold][ 'modified_peptide'], np.min( targets[targets['q_value'] < peptide_fdr_threshold]['pp'])
def test_random(regtest): np.random.seed(1) for i in (1, 2, 5, 10, 100): for j in (1, 2, 5, 10, 100): stat = np.random.random((i, )) stat0 = np.random.random((j, )) print(i, j, file=regtest) print(pemp(stat, stat0), file=regtest)
def test_random(regtest): np.random.seed(1) for i in (1, 2, 5, 10, 100): for j in (1, 2, 5, 10, 100): stat = np.random.random((i,)) stat0 = np.random.random((j,)) print(i, j, file=regtest) print(pemp(stat, stat0), file=regtest)
def test_1(): stat = np.array([0, 1, 3, 2, 0.1, 0.5, 0.6, 0.3, 0.5, 0.6, 0.2, 0.5]) stat0 = np.array([0.4, 0.2, 0.5, 1, 0.5, 0.7, 0.2, 0.4]) np.testing.assert_almost_equal( pemp(stat, stat0), np.array([ 1.0, 0.125, 0.125, 0.125, 1.0, 0.25, 0.25, 0.75, 0.25, 0.25, 0.75, 0.25 ]))
def protein_fdr(psms, protein_fdr_threshold, pi0_lambda, plot_path): pi0_method = 'bootstrap' pi0_smooth_df = 3 pi0_smooth_log_pi0 = False pfdr = False proteins = psms.groupby(['protein_id', 'decoy'])['pp'].max().reset_index() targets = proteins[~proteins['decoy']].copy() decoys = proteins[proteins['decoy']].copy() targets['p_value'] = pemp(targets['pp'], decoys['pp']) targets['q_value'] = qvalue( targets['p_value'], pi0est(targets['p_value'], pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0)['pi0'], pfdr) plot(plot_path, "global protein scores", targets['pp'], decoys['pp']) return targets[ targets['q_value'] < protein_fdr_threshold]['protein_id'], np.min( targets[targets['q_value'] < protein_fdr_threshold]['pp'])
def test_1(): stat = np.array([0, 1, 3, 2, 0.1, 0.5, 0.6, 0.3, 0.5, 0.6, 0.2, 0.5]) stat0 = np.array([0.4, 0.2, 0.5, 1, 0.5, 0.7, 0.2, 0.4]) np.testing.assert_almost_equal(pemp(stat, stat0), np.array([1.0, 0.125, 0.125, 0.125, 1.0, 0.25, 0.25, 0.75, 0.25, 0.25, 0.75, 0.25]))
def combine_scores(self, scores): combined_scores = scores.groupby( ['condition_id', 'bait_id', 'prey_id', 'decoy', 'confidence_bin'])['score'].mean().reset_index() combined_scores.loc[combined_scores['decoy'] == 0, 'pvalue'] = pemp( combined_scores[combined_scores['decoy'] == 0]['score'], combined_scores[combined_scores['decoy'] == 1]['score']) pi0_combined = pi0est( combined_scores[combined_scores['decoy'] == 0]['pvalue'], self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0)['pi0'] combined_scores.loc[combined_scores['decoy'] == 0, 'qvalue'] = qvalue( combined_scores[combined_scores['decoy'] == 0]['pvalue'], pi0_combined, self.pfdr) click.echo("Info: Unique interactions detected before integration:") click.echo( "%s (at q-value < 0.01)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.01)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo( "%s (at q-value < 0.05)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.05)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.1)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.1)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.2)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.2)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.5)" % (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.5)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("Info: Unique interactions detected after integration:") click.echo("%s (at q-value < 0.01)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.01)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.05)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.05)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.1)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.1)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.2)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.2)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("%s (at q-value < 0.5)" % (combined_scores[(combined_scores['decoy'] == 0) & (combined_scores['qvalue'] < 0.5)][[ 'bait_id', 'prey_id' ]].drop_duplicates().shape[0])) click.echo("Info: Combined pi0: %s." % pi0_combined) return combined_scores