Example #1
0
 def test_qvalues_from_dataframe(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = pd.DataFrame(np.array(list(self.psms), dtype=dtype))
     q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1)
     self._run_check(q, 1)
     q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, full_output=True)
     self._run_check(q, 1)
Example #2
0
 def test_qvalues_pep_from_dataframe_string_key_and_pep(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = pd.DataFrame(np.array(list(self.psms), dtype=dtype))
     q = aux.qvalues(psms, key='score', pep='pep')
     self._run_check_pep(q)
     q = aux.qvalues(psms, key='score', pep='pep', full_output=True)
     self._run_check_pep(q)
Example #3
0
 def test_qvalues_empty_dataframe(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = pd.DataFrame(np.array([], dtype=dtype))
     q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1)
     self.assertEqual(q.shape[0], 0)
     q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, full_output=True)
     self.assertEqual(q.shape[0], 0)
Example #4
0
 def test_qvalues_pep_from_numpy(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = np.array(list(self.psms), dtype=dtype)
     q = aux.qvalues(psms, pep=self.pep)
     self._run_check_pep(q)
     q = aux.qvalues(psms, key=self.key, pep=self.pep, full_output=True)
     self._run_check_pep(q)
     self.assertTrue(q['psm'].dtype == dtype)
Example #5
0
    def filter_evalue_new(self, FDR=1, FDR2=1, useMP=True, drop_decoy=True, toprint=True):
        "A function for filtering PSMs by e-value and MP-score with some FDR"
        isdecoy = lambda x: x[0].note == 'decoy'
        escore = lambda x: float(x[0].evalue)
        mscore = lambda x: -float(x[0].peptscore)

        new_peptides = self.copy_empty()
        for infile in self.get_infiles():
            infile_peptides = []
            for val in self.get_izip_full():
            # for peptide, spectrum in izip(self.peptideslist, self.spectrumlist):
            #     if peptide.infile == infile:
                if val[0].infile == infile:
                    infile_peptides.append(val)
            filtered_peptides = aux.filter(infile_peptides, fdr=float(FDR)/100, key=escore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True)
            qvals_e = aux.qvalues(filtered_peptides, key=escore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True)
            try:
                best_cut_evalue = max(escore(p) for p in filtered_peptides)
                real_FDR = round(aux.fdr(filtered_peptides, is_decoy=isdecoy) * 100, 1)
            except:
                best_cut_evalue = 0
                real_FDR = 0
            if toprint:
                logger.info('%s %s e-value', real_FDR, best_cut_evalue)
            best_cut_peptscore = 1.1
            if useMP:
                tmp_peptides = []
                for p in infile_peptides:
                    if escore(p) > best_cut_evalue:
                        tmp_peptides.append(p)
                filtered_peptides = aux.filter(tmp_peptides, fdr=float(FDR2)/100, key=mscore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True)
                qvals_m = aux.qvalues(filtered_peptides, key=mscore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True)
                try:
                    best_cut_peptscore = min(float(p[0].peptscore) for p in filtered_peptides)
                    real_FDR = round(aux.fdr(filtered_peptides, is_decoy=isdecoy) * 100, 1)
                except:
                    best_cut_peptscore = 1.1
                    real_FDR = 0
                if toprint:
                    logger.info('%s %s MP score', real_FDR, best_cut_peptscore)
            for val in qvals_e:
                val[-1][0].qval = val[-2]
                new_peptides.add_elem(val[-1])
                # new_peptides.peptideslist.append(val[-1][0])
                # new_peptides.peptideslist[-1].qval = val[-2]
                # new_peptides.spectrumlist.append(val[-1][1])
            if useMP:
                for val in qvals_m:
                    val[-1][0].qval = val[-2]
                    new_peptides.add_elem(val[-1])
                    # new_peptides.peptideslist.append(val[-1][0])
                    # new_peptides.peptideslist[-1].qval = val[-2]
                    # new_peptides.spectrumlist.append(val[-1][1])
        # new_peptides.spectrumlist = np.array(new_peptides.spectrumlist)
        new_peptides.check_arrays()
        if drop_decoy:
            new_peptides.filter_decoy()
        return (new_peptides, best_cut_evalue, best_cut_peptscore)
Example #6
0
 def test_qvalues_from_numpy(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = np.array(list(self.psms), dtype=dtype)
     q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1)
     self._run_check(q, 1)
     q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1,
         full_output=True)
     self._run_check(q, 1)
     self.assertTrue(q['psm'].dtype == dtype)
Example #7
0
 def test_qvalues_from_dataframe_string_key_and_is_decoy(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = pd.DataFrame(np.array(list(self.psms), dtype=dtype))
     psms['is decoy'] = [self.is_decoy(row) for _, row in psms.iterrows()]
     q = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1)
     self._run_check(q, 1)
     q = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1,
         full_output=True)
     self._run_check(q, 1)
Example #8
0
 def test_qvalues_pep_from_numpy_string_pep(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = np.array(list(self.psms), dtype=dtype)
     q = aux.qvalues(psms, pep='pep')
     self._run_check_pep(q)
     q = aux.qvalues(psms, key='score', pep='pep')
     self._run_check_pep(q)
     q = aux.qvalues(psms, key='score', pep='pep', full_output=True)
     self._run_check_pep(q)
Example #9
0
def calc_qvals(df, ratio):
    logger.debug('Q-value calculation started...')
    df_t_1 = aux.qvalues(df[~df['decoy1']], key='ML score', is_decoy='decoy2',
        remove_decoy=False, formula=1, full_output=True, ratio=ratio, correction=1)
    df_t = aux.qvalues(df[~df['decoy1']], key='ML score', is_decoy='decoy2',
        remove_decoy=False, formula=1, full_output=True, ratio=ratio, correction=0)
    df.loc[~df['decoy1'], 'q'] = df_t_1['q']
    df.loc[~df['decoy1'], 'q_uncorrected'] = df_t['q']
    df.loc[df['decoy1'], 'q'] = None
    df.loc[df['decoy1'], 'q_uncorrected'] = None
Example #10
0
 def test_filter_empty_dataframe_str_key_str_is_decoy(self):
     # dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64), ('is decoy', np.bool)]
     psms = pd.DataFrame({'score': [], 'is decoy': []})
     f = aux.filter(psms, key='score', is_decoy='is decoy', fdr=0.1)
     self.assertEqual(f.shape[0], 0)
     f = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1, full_output=True, fdr=0.01)
     self.assertEqual(f.shape[0], 0)
Example #11
0
def filter_custom(df, fdr, key, is_decoy, reverse, remove_decoy, ratio, formula, correction=None, loglabel=None):
    kw = dict(key=key, is_decoy=is_decoy, reverse=reverse, full_output=True,
        remove_decoy=False, ratio=ratio, formula=formula)
    df = df.copy()
    q = aux.qvalues(df, correction=1, **kw)
    q_uncorr = aux.qvalues(df, correction=0, **kw)
    df['q'] = q['q']
    df['q_uncorrected'] = q_uncorr['q']

    if correction is not None:
        qlabel = 'q' if correction else 'q_uncorrected'
        logger.debug('Explicitly using %s for filtering.', qlabel)
    elif df['q'].min() < fdr:
        logger.debug('Successfully filtered with +1 correction (label = %s).', loglabel)
        qlabel = 'q'
    else:
        logger.info('No results for filtering with +1 correction (label = %s). Rerunning without correction...', loglabel)
        qlabel = 'q_uncorrected'
    if remove_decoy:
        df = df[~df[is_decoy]]
    return df[df[qlabel] < fdr].copy()
Example #12
0
 def _read_pin_from_peprec(self, path_to_peprec):
     peprec = pd.read_table(path_to_peprec, sep=" ")
     pin_qvalues = pd.DataFrame(
         qvalues(
             peprec,
             key=peprec["psm_score"],
             is_decoy=peprec["Label"] == -1,
             reverse=True,
             remove_decoy=False,
             formula=1,
             full_output=True,
         ))
     return pin_qvalues[["spec_id", "is decoy", "score", "q",
                         "peptide"]].rename(columns={"spec_id": "PSMId"})
Example #13
0
def filter_evalue_prots(prots, FDR=1.0, remove_decoy=True, dec_prefix='DECOY_'):

    proteins = prots.items()

    isdecoy = lambda x: x[0].startswith(dec_prefix)
    escore = lambda x: float(x[1]['expect'])
    filtered_proteins = aux.filter(proteins, fdr=float(FDR) / 100, key=escore, is_decoy=isdecoy,
                                   remove_decoy=False, formula=1, full_output=True)
    qvals_e = aux.qvalues(filtered_proteins, key=escore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1,
                          full_output=True)
    new_prots = {}
    for val in qvals_e:
        val[-1][1]['qval'] = val[-2]
        if (not remove_decoy or not val[-1][0].startswith(dec_prefix)):
            new_prots[val[-1][0]] = val[-1][1]
    logger.info('Actual protein-level FDR = %.2f%%', aux.fdr(filtered_proteins, is_decoy=isdecoy) * 100)
    return new_prots
Example #14
0
 def _read_pin_file(self, path_to_pin):
     """Read pin file, calculate qvalues and write into single pandas DataFrame."""
     pin = PercolatorIn(path_to_pin)
     pin_qvalues = pd.DataFrame(
         qvalues(
             pin.df,
             key=pin.df[self.score_metric],
             is_decoy=pin.df["Label"] == -1,
             reverse=True,
             remove_decoy=False,
             formula=1,
             full_output=True,
         ))
     return pin_qvalues[["SpecId", "is decoy", "score", "q",
                         "Peptide"]].rename(columns={
                             "SpecId": "PSMId",
                             "Peptide": "peptide"
                         })
Example #15
0
 def test_qvalues_pep_full_output(self):
     q = aux.qvalues(self.psms, pep=self.pep, full_output=True)
     self._run_check_pep(q)
     q = aux.qvalues(self.psms, key=self.key, pep=self.pep, full_output=True)
     self._run_check_pep(q)
Example #16
0
 def test_qvalues_from_tandem(self):
     psms = tandem.TandemXML('test.t.xml')
     q0 = aux.qvalues(psms, key=op.itemgetter('expect'), is_decoy=tandem.is_decoy)
     with tandem.TandemXML('test.t.xml') as psms:
         q1 = aux.qvalues(psms, key=op.itemgetter('expect'), is_decoy=tandem.is_decoy)
     self.assertTrue(np.allclose(q0['q'], q1['q']))
Example #17
0
 def test_qvalues(self):
     q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=True)
     self.assertTrue(np.allclose(q['q'], 0))
     self.assertTrue(np.allclose(q['is decoy'], 0))
     self.assertTrue(np.allclose(q['score'], np.arange(26)))
Example #18
0
 def test_qvalues_pep(self):
     q = aux.qvalues(self.psms, pep=self.pep)
     self._run_check_pep(q)
     q = aux.qvalues(self.psms, pep=self.pep, key=self.key)
     self._run_check_pep(q)
Example #19
0
 def test_qvalues_with_decoy(self):
     q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False)
     self._run_check(q, 2)
     q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1)
     self._run_check(q, 1)
Example #20
0
 def test_qvalues_full_output(self):
     q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, full_output=True)
     self._run_check(q, 2)