Esempio n. 1
0
def peptide_fdr(psms, peptide_fdr_threshold, pi0_lambda, plot_path, nofdr):
    pi0_method = 'bootstrap'
    pi0_smooth_df = 3
    pi0_smooth_log_pi0 = False
    pfdr = False

    if nofdr:
        peptides = psms.groupby(['modified_peptide', 'decoy',
                                 'q_value'])['pp'].max().reset_index()
        targets = peptides[~peptides['decoy']].copy()
        decoys = peptides[peptides['decoy']].copy()

    else:
        peptides = psms.groupby(['modified_peptide',
                                 'decoy'])['pp'].max().reset_index()
        targets = peptides[~peptides['decoy']].copy()
        decoys = peptides[peptides['decoy']].copy()

        targets['p_value'] = pemp(targets['pp'], decoys['pp'])
        targets['q_value'] = qvalue(
            targets['p_value'],
            pi0est(targets['p_value'], pi0_lambda, pi0_method, pi0_smooth_df,
                   pi0_smooth_log_pi0)['pi0'], pfdr)

        plot(plot_path, "global peptide scores", targets['pp'], decoys['pp'])

    return targets[targets['q_value'] < peptide_fdr_threshold][
        'modified_peptide'], np.min(
            targets[targets['q_value'] < peptide_fdr_threshold]['pp'])
Esempio n. 2
0
def test_qvalue(tmpdir, regtest):
    os.chdir(tmpdir.strpath)
    data_path = os.path.join(DATA_FOLDER, "test_qvalue_ref_data.csv")
    shutil.copy(data_path, tmpdir.strpath)

    stat = pd.read_csv('test_qvalue_ref_data.csv',
                       delimiter=',').sort_values("p")

    # For comparison with R/bioconductor reference implementation
    np.testing.assert_almost_equal(qvalue(stat['p'],
                                          0.669926026474838,
                                          pfdr=False),
                                   stat['q_default'].values,
                                   decimal=4)
    np.testing.assert_almost_equal(qvalue(stat['p'],
                                          0.669926026474838,
                                          pfdr=True),
                                   stat['q_pfdr'].values,
                                   decimal=4)
Esempio n. 3
0
def protein_fdr(psms, protein_fdr_threshold, pi0_lambda, plot_path):
    pi0_method = 'bootstrap'
    pi0_smooth_df = 3
    pi0_smooth_log_pi0 = False
    pfdr = False

    proteins = psms.groupby(['protein_id', 'decoy'])['pp'].max().reset_index()
    targets = proteins[~proteins['decoy']].copy()
    decoys = proteins[proteins['decoy']].copy()

    targets['p_value'] = pemp(targets['pp'], decoys['pp'])
    targets['q_value'] = qvalue(
        targets['p_value'],
        pi0est(targets['p_value'], pi0_lambda, pi0_method, pi0_smooth_df,
               pi0_smooth_log_pi0)['pi0'], pfdr)

    plot(plot_path, "global protein scores", targets['pp'], decoys['pp'])

    return targets[
        targets['q_value'] < protein_fdr_threshold]['protein_id'], np.min(
            targets[targets['q_value'] < protein_fdr_threshold]['pp'])
Esempio n. 4
0
    def combine_scores(self, scores):
        combined_scores = scores.groupby(
            ['condition_id', 'bait_id', 'prey_id', 'decoy',
             'confidence_bin'])['score'].mean().reset_index()

        combined_scores.loc[combined_scores['decoy'] == 0, 'pvalue'] = pemp(
            combined_scores[combined_scores['decoy'] == 0]['score'],
            combined_scores[combined_scores['decoy'] == 1]['score'])

        pi0_combined = pi0est(
            combined_scores[combined_scores['decoy'] == 0]['pvalue'],
            self.pi0_lambda, self.pi0_method, self.pi0_smooth_df,
            self.pi0_smooth_log_pi0)['pi0']
        combined_scores.loc[combined_scores['decoy'] == 0, 'qvalue'] = qvalue(
            combined_scores[combined_scores['decoy'] == 0]['pvalue'],
            pi0_combined, self.pfdr)

        click.echo("Info: Unique interactions detected before integration:")
        click.echo(
            "%s (at q-value < 0.01)" %
            (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.01)][[
                'bait_id', 'prey_id'
            ]].drop_duplicates().shape[0]))
        click.echo(
            "%s (at q-value < 0.05)" %
            (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.05)][[
                'bait_id', 'prey_id'
            ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.1)" %
                   (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.1)][[
                       'bait_id', 'prey_id'
                   ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.2)" %
                   (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.2)][[
                       'bait_id', 'prey_id'
                   ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.5)" %
                   (scores[(scores['decoy'] == 0) & (scores['qvalue'] < 0.5)][[
                       'bait_id', 'prey_id'
                   ]].drop_duplicates().shape[0]))

        click.echo("Info: Unique interactions detected after integration:")
        click.echo("%s (at q-value < 0.01)" %
                   (combined_scores[(combined_scores['decoy'] == 0)
                                    & (combined_scores['qvalue'] < 0.01)][[
                                        'bait_id', 'prey_id'
                                    ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.05)" %
                   (combined_scores[(combined_scores['decoy'] == 0)
                                    & (combined_scores['qvalue'] < 0.05)][[
                                        'bait_id', 'prey_id'
                                    ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.1)" %
                   (combined_scores[(combined_scores['decoy'] == 0)
                                    & (combined_scores['qvalue'] < 0.1)][[
                                        'bait_id', 'prey_id'
                                    ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.2)" %
                   (combined_scores[(combined_scores['decoy'] == 0)
                                    & (combined_scores['qvalue'] < 0.2)][[
                                        'bait_id', 'prey_id'
                                    ]].drop_duplicates().shape[0]))
        click.echo("%s (at q-value < 0.5)" %
                   (combined_scores[(combined_scores['decoy'] == 0)
                                    & (combined_scores['qvalue'] < 0.5)][[
                                        'bait_id', 'prey_id'
                                    ]].drop_duplicates().shape[0]))
        click.echo("Info: Combined pi0: %s." % pi0_combined)

        return combined_scores