def test_qvalues_from_dataframe(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = pd.DataFrame(np.array(list(self.psms), dtype=dtype)) q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1) self._run_check(q, 1) q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, full_output=True) self._run_check(q, 1)
def test_qvalues_pep_from_dataframe_string_key_and_pep(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = pd.DataFrame(np.array(list(self.psms), dtype=dtype)) q = aux.qvalues(psms, key='score', pep='pep') self._run_check_pep(q) q = aux.qvalues(psms, key='score', pep='pep', full_output=True) self._run_check_pep(q)
def test_qvalues_empty_dataframe(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = pd.DataFrame(np.array([], dtype=dtype)) q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1) self.assertEqual(q.shape[0], 0) q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, full_output=True) self.assertEqual(q.shape[0], 0)
def test_qvalues_pep_from_numpy(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = np.array(list(self.psms), dtype=dtype) q = aux.qvalues(psms, pep=self.pep) self._run_check_pep(q) q = aux.qvalues(psms, key=self.key, pep=self.pep, full_output=True) self._run_check_pep(q) self.assertTrue(q['psm'].dtype == dtype)
def filter_evalue_new(self, FDR=1, FDR2=1, useMP=True, drop_decoy=True, toprint=True): "A function for filtering PSMs by e-value and MP-score with some FDR" isdecoy = lambda x: x[0].note == 'decoy' escore = lambda x: float(x[0].evalue) mscore = lambda x: -float(x[0].peptscore) new_peptides = self.copy_empty() for infile in self.get_infiles(): infile_peptides = [] for val in self.get_izip_full(): # for peptide, spectrum in izip(self.peptideslist, self.spectrumlist): # if peptide.infile == infile: if val[0].infile == infile: infile_peptides.append(val) filtered_peptides = aux.filter(infile_peptides, fdr=float(FDR)/100, key=escore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True) qvals_e = aux.qvalues(filtered_peptides, key=escore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True) try: best_cut_evalue = max(escore(p) for p in filtered_peptides) real_FDR = round(aux.fdr(filtered_peptides, is_decoy=isdecoy) * 100, 1) except: best_cut_evalue = 0 real_FDR = 0 if toprint: logger.info('%s %s e-value', real_FDR, best_cut_evalue) best_cut_peptscore = 1.1 if useMP: tmp_peptides = [] for p in infile_peptides: if escore(p) > best_cut_evalue: tmp_peptides.append(p) filtered_peptides = aux.filter(tmp_peptides, fdr=float(FDR2)/100, key=mscore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True) qvals_m = aux.qvalues(filtered_peptides, key=mscore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True) try: best_cut_peptscore = min(float(p[0].peptscore) for p in filtered_peptides) real_FDR = round(aux.fdr(filtered_peptides, is_decoy=isdecoy) * 100, 1) except: best_cut_peptscore = 1.1 real_FDR = 0 if toprint: logger.info('%s %s MP score', real_FDR, best_cut_peptscore) for val in qvals_e: val[-1][0].qval = val[-2] new_peptides.add_elem(val[-1]) # new_peptides.peptideslist.append(val[-1][0]) # new_peptides.peptideslist[-1].qval = val[-2] # new_peptides.spectrumlist.append(val[-1][1]) if useMP: for val in qvals_m: val[-1][0].qval = val[-2] new_peptides.add_elem(val[-1]) # new_peptides.peptideslist.append(val[-1][0]) # new_peptides.peptideslist[-1].qval = val[-2] # new_peptides.spectrumlist.append(val[-1][1]) # new_peptides.spectrumlist = np.array(new_peptides.spectrumlist) new_peptides.check_arrays() if drop_decoy: new_peptides.filter_decoy() return (new_peptides, best_cut_evalue, best_cut_peptscore)
def test_qvalues_from_numpy(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = np.array(list(self.psms), dtype=dtype) q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1) self._run_check(q, 1) q = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, full_output=True) self._run_check(q, 1) self.assertTrue(q['psm'].dtype == dtype)
def test_qvalues_from_dataframe_string_key_and_is_decoy(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = pd.DataFrame(np.array(list(self.psms), dtype=dtype)) psms['is decoy'] = [self.is_decoy(row) for _, row in psms.iterrows()] q = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1) self._run_check(q, 1) q = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1, full_output=True) self._run_check(q, 1)
def test_qvalues_pep_from_numpy_string_pep(self): dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)] psms = np.array(list(self.psms), dtype=dtype) q = aux.qvalues(psms, pep='pep') self._run_check_pep(q) q = aux.qvalues(psms, key='score', pep='pep') self._run_check_pep(q) q = aux.qvalues(psms, key='score', pep='pep', full_output=True) self._run_check_pep(q)
def calc_qvals(df, ratio): logger.debug('Q-value calculation started...') df_t_1 = aux.qvalues(df[~df['decoy1']], key='ML score', is_decoy='decoy2', remove_decoy=False, formula=1, full_output=True, ratio=ratio, correction=1) df_t = aux.qvalues(df[~df['decoy1']], key='ML score', is_decoy='decoy2', remove_decoy=False, formula=1, full_output=True, ratio=ratio, correction=0) df.loc[~df['decoy1'], 'q'] = df_t_1['q'] df.loc[~df['decoy1'], 'q_uncorrected'] = df_t['q'] df.loc[df['decoy1'], 'q'] = None df.loc[df['decoy1'], 'q_uncorrected'] = None
def test_filter_empty_dataframe_str_key_str_is_decoy(self): # dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64), ('is decoy', np.bool)] psms = pd.DataFrame({'score': [], 'is decoy': []}) f = aux.filter(psms, key='score', is_decoy='is decoy', fdr=0.1) self.assertEqual(f.shape[0], 0) f = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1, full_output=True, fdr=0.01) self.assertEqual(f.shape[0], 0)
def filter_custom(df, fdr, key, is_decoy, reverse, remove_decoy, ratio, formula, correction=None, loglabel=None): kw = dict(key=key, is_decoy=is_decoy, reverse=reverse, full_output=True, remove_decoy=False, ratio=ratio, formula=formula) df = df.copy() q = aux.qvalues(df, correction=1, **kw) q_uncorr = aux.qvalues(df, correction=0, **kw) df['q'] = q['q'] df['q_uncorrected'] = q_uncorr['q'] if correction is not None: qlabel = 'q' if correction else 'q_uncorrected' logger.debug('Explicitly using %s for filtering.', qlabel) elif df['q'].min() < fdr: logger.debug('Successfully filtered with +1 correction (label = %s).', loglabel) qlabel = 'q' else: logger.info('No results for filtering with +1 correction (label = %s). Rerunning without correction...', loglabel) qlabel = 'q_uncorrected' if remove_decoy: df = df[~df[is_decoy]] return df[df[qlabel] < fdr].copy()
def _read_pin_from_peprec(self, path_to_peprec): peprec = pd.read_table(path_to_peprec, sep=" ") pin_qvalues = pd.DataFrame( qvalues( peprec, key=peprec["psm_score"], is_decoy=peprec["Label"] == -1, reverse=True, remove_decoy=False, formula=1, full_output=True, )) return pin_qvalues[["spec_id", "is decoy", "score", "q", "peptide"]].rename(columns={"spec_id": "PSMId"})
def filter_evalue_prots(prots, FDR=1.0, remove_decoy=True, dec_prefix='DECOY_'): proteins = prots.items() isdecoy = lambda x: x[0].startswith(dec_prefix) escore = lambda x: float(x[1]['expect']) filtered_proteins = aux.filter(proteins, fdr=float(FDR) / 100, key=escore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True) qvals_e = aux.qvalues(filtered_proteins, key=escore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True) new_prots = {} for val in qvals_e: val[-1][1]['qval'] = val[-2] if (not remove_decoy or not val[-1][0].startswith(dec_prefix)): new_prots[val[-1][0]] = val[-1][1] logger.info('Actual protein-level FDR = %.2f%%', aux.fdr(filtered_proteins, is_decoy=isdecoy) * 100) return new_prots
def _read_pin_file(self, path_to_pin): """Read pin file, calculate qvalues and write into single pandas DataFrame.""" pin = PercolatorIn(path_to_pin) pin_qvalues = pd.DataFrame( qvalues( pin.df, key=pin.df[self.score_metric], is_decoy=pin.df["Label"] == -1, reverse=True, remove_decoy=False, formula=1, full_output=True, )) return pin_qvalues[["SpecId", "is decoy", "score", "q", "Peptide"]].rename(columns={ "SpecId": "PSMId", "Peptide": "peptide" })
def test_qvalues_pep_full_output(self): q = aux.qvalues(self.psms, pep=self.pep, full_output=True) self._run_check_pep(q) q = aux.qvalues(self.psms, key=self.key, pep=self.pep, full_output=True) self._run_check_pep(q)
def test_qvalues_from_tandem(self): psms = tandem.TandemXML('test.t.xml') q0 = aux.qvalues(psms, key=op.itemgetter('expect'), is_decoy=tandem.is_decoy) with tandem.TandemXML('test.t.xml') as psms: q1 = aux.qvalues(psms, key=op.itemgetter('expect'), is_decoy=tandem.is_decoy) self.assertTrue(np.allclose(q0['q'], q1['q']))
def test_qvalues(self): q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=True) self.assertTrue(np.allclose(q['q'], 0)) self.assertTrue(np.allclose(q['is decoy'], 0)) self.assertTrue(np.allclose(q['score'], np.arange(26)))
def test_qvalues_pep(self): q = aux.qvalues(self.psms, pep=self.pep) self._run_check_pep(q) q = aux.qvalues(self.psms, pep=self.pep, key=self.key) self._run_check_pep(q)
def test_qvalues_with_decoy(self): q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False) self._run_check(q, 2) q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1) self._run_check(q, 1)
def test_qvalues_full_output(self): q = aux.qvalues(self.psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, full_output=True) self._run_check(q, 2)