Beispiel #1
0
 def test_filter_iter(self):
     psms = iter(self.psms)
     f = aux.filter(psms, key=self.key, is_decoy=self.is_decoy, fdr=0.5)
     self.assertEqual(f.shape[0], 26)
     psms = iter(self.psms)
     with aux.filter(psms, key=self.key, is_decoy=self.is_decoy, fdr=0.5, full_output=False) as f:
         f1 = list(f)
     self.assertEqual(len(f1), 26)
Beispiel #2
0
 def test_filter_pep_iter(self):
     psms = iter(self.psms)
     f = aux.filter(psms, key=self.key, pep=self.pep, fdr=0.02)
     self.assertEqual(f.shape[0], 21)
     psms = iter(self.psms)
     with aux.filter(psms, key=self.key, pep=self.pep, fdr=0.02, full_output=False) as f:
         f1 = list(f)
     self.assertEqual(len(f1), 21)
Beispiel #3
0
    def filter_evalue_new(self, FDR=1, FDR2=1, useMP=True, drop_decoy=True, toprint=True):
        "A function for filtering PSMs by e-value and MP-score with some FDR"
        isdecoy = lambda x: x[0].note == 'decoy'
        escore = lambda x: float(x[0].evalue)
        mscore = lambda x: -float(x[0].peptscore)

        new_peptides = self.copy_empty()
        for infile in self.get_infiles():
            infile_peptides = []
            for val in self.get_izip_full():
            # for peptide, spectrum in izip(self.peptideslist, self.spectrumlist):
            #     if peptide.infile == infile:
                if val[0].infile == infile:
                    infile_peptides.append(val)
            filtered_peptides = aux.filter(infile_peptides, fdr=float(FDR)/100, key=escore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True)
            qvals_e = aux.qvalues(filtered_peptides, key=escore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True)
            try:
                best_cut_evalue = max(escore(p) for p in filtered_peptides)
                real_FDR = round(aux.fdr(filtered_peptides, is_decoy=isdecoy) * 100, 1)
            except:
                best_cut_evalue = 0
                real_FDR = 0
            if toprint:
                logger.info('%s %s e-value', real_FDR, best_cut_evalue)
            best_cut_peptscore = 1.1
            if useMP:
                tmp_peptides = []
                for p in infile_peptides:
                    if escore(p) > best_cut_evalue:
                        tmp_peptides.append(p)
                filtered_peptides = aux.filter(tmp_peptides, fdr=float(FDR2)/100, key=mscore, is_decoy=isdecoy, remove_decoy=False, formula=1, full_output=True)
                qvals_m = aux.qvalues(filtered_peptides, key=mscore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1, full_output=True)
                try:
                    best_cut_peptscore = min(float(p[0].peptscore) for p in filtered_peptides)
                    real_FDR = round(aux.fdr(filtered_peptides, is_decoy=isdecoy) * 100, 1)
                except:
                    best_cut_peptscore = 1.1
                    real_FDR = 0
                if toprint:
                    logger.info('%s %s MP score', real_FDR, best_cut_peptscore)
            for val in qvals_e:
                val[-1][0].qval = val[-2]
                new_peptides.add_elem(val[-1])
                # new_peptides.peptideslist.append(val[-1][0])
                # new_peptides.peptideslist[-1].qval = val[-2]
                # new_peptides.spectrumlist.append(val[-1][1])
            if useMP:
                for val in qvals_m:
                    val[-1][0].qval = val[-2]
                    new_peptides.add_elem(val[-1])
                    # new_peptides.peptideslist.append(val[-1][0])
                    # new_peptides.peptideslist[-1].qval = val[-2]
                    # new_peptides.spectrumlist.append(val[-1][1])
        # new_peptides.spectrumlist = np.array(new_peptides.spectrumlist)
        new_peptides.check_arrays()
        if drop_decoy:
            new_peptides.filter_decoy()
        return (new_peptides, best_cut_evalue, best_cut_peptscore)
Beispiel #4
0
 def test_filter_pep_array_iter_key_str_is_decoy(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64), ('is decoy', np.bool)]
     psms = np.array([(s, l, p, self.is_decoy((s, l, p))) for s, l, p in self.psms], dtype=dtype)
     key = iter([self.key(psm) for psm in psms])
     f = aux.filter(psms, key=key, pep='pep', fdr=0.02)
     self.assertEqual(f.shape[0], 21)
     key = iter(self.key(psm) for psm in psms)
     with aux.filter(psms, key=key, pep='pep', fdr=0.02, full_output=False) as f:
         f1 = list(f)
     self.assertEqual(len(f1), 21)
Beispiel #5
0
 def test_filter_pep_array_gen_key(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = np.array(self.psms, dtype=dtype)
     key = (self.key(psm) for psm in psms)
     f = aux.filter(psms, key=key, pep=self.pep, fdr=0.02)
     self.assertEqual(f.shape[0], 21)
     key = (self.key(psm) for psm in psms)
     with aux.filter(psms, key=key, pep=self.pep, fdr=0.02, full_output=False) as f:
         f11 = list(f)
     self.assertEqual(len(f11), 21)
Beispiel #6
0
 def test_filter_pep_two_iters(self):
     i = np.random.randint(1, len(self.psms)-1)
     psms1 = iter(self.psms[:i])
     psms2 = iter(self.psms[i:])
     f = aux.filter(psms1, psms2, key=self.key, pep=self.pep, fdr=0.02)
     self.assertEqual(f.shape[0], 21)
     psms1 = iter(self.psms[:i])
     psms2 = iter(self.psms[i:])
     with aux.filter(psms1, psms2, key=self.key, pep=self.pep, fdr=0.02, full_output=False) as f:
         f1 = list(f)
     self.assertEqual(len(f1), 21)
Beispiel #7
0
 def test_filter_two_iters(self):
     i = np.random.randint(1, len(self.psms)-1)
     psms1 = iter(self.psms[:i])
     psms2 = iter(self.psms[i:])
     f11 = aux.filter(psms1, psms2, key=self.key, is_decoy=self.is_decoy, fdr=0.5)
     self.assertEqual(f11.shape[0], 26)
     psms1 = iter(self.psms[:i])
     psms2 = iter(self.psms[i:])
     with aux.filter(psms1, psms2, key=self.key, is_decoy=self.is_decoy, fdr=0.5, full_output=False) as f:
         f11 = list(f)
     self.assertEqual(len(f11), 26)
Beispiel #8
0
 def test_filter_pep_two_dataframes_str_key_iter_pep(self):
     i = np.random.randint(1, len(self.psms)-1)
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64), ('is decoy', np.bool)]
     psms = np.array([(s, l, p, self.is_decoy((s, l, p))) for s, l, p in self.psms], dtype=dtype)
     pep = iter(psms['pep'])
     psms1 = pd.DataFrame(psms[:i])
     psms2 = pd.DataFrame(psms[i:])
     f = aux.filter(psms1, psms2, key='score', pep=pep, fdr=0.02)
     self.assertEqual(f.shape[0], 21)
     pep = iter(psms['pep'])
     with aux.filter(psms1, psms2, key='score', pep=pep, fdr=0.02, full_output=False) as f:
         f1 = list(f)
     self.assertEqual(len(f1), 21)
Beispiel #9
0
    def _run_check(self, *args, **kwargs):
        key = kwargs.get('key', self.key)
        is_decoy = kwargs.get('is_decoy', self.is_decoy)
        f11 = aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5)
        f12 = aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, formula=2)
        f21 = aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, remove_decoy=False, formula=1)
        f22 = aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, remove_decoy=False)

        self.assertEqual(f11.shape[0], 26)
        self.assertEqual(f12.shape[0], 26)
        self.assertEqual(f21.shape[0], 39)
        self.assertEqual(f22.shape[0], 34)

        with aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, full_output=False) as f:
            f11 = list(f)
        with aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, formula=2, full_output=False) as f:
            f12 = list(f)
        with aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, remove_decoy=False, formula=1, full_output=False) as f:
            f21 = list(f)
        with aux.filter(*args, key=key, is_decoy=is_decoy, fdr=0.5, remove_decoy=False, full_output=False) as f:
            f22 = list(f)

        self.assertEqual(len(f11), 26)
        self.assertEqual(len(f12), 26)
        self.assertEqual(len(f21), 39)
        self.assertEqual(len(f22), 34)
Beispiel #10
0
    def _run_check_pep(self, *args, **kwargs):
        key = kwargs.pop('key', self.key)
        f11 = aux.filter(*args, key=key, fdr=0.02, **kwargs)
        f12 = aux.filter(*args, fdr=0.02, **kwargs)

        self.assertEqual(f11.shape[0], 21)
        self.assertEqual(f12.shape[0], 21)

        with aux.filter(*args, key=key, fdr=0.02, full_output=False, **kwargs) as f:
            f11 = list(f)
        with aux.filter(*args, fdr=0.02, full_output=False, **kwargs) as f:
            f12 = list(f)

        self.assertEqual(len(f11), 21)
        self.assertEqual(len(f12), 21)
Beispiel #11
0
def get_subset(results, settings, fdr=0.01):
    """Filter results to given FDR using top 1 candidates"""
    subset = aux.filter(results,
                        key=lambda x: x['e-values'][0],
                        is_decoy=lambda x: x['candidates'][0][2] == 'd',
                        fdr=fdr)
    return subset
Beispiel #12
0
 def test_filter_empty_dataframe_str_key_str_is_decoy(self):
     # dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64), ('is decoy', np.bool)]
     psms = pd.DataFrame({'score': [], 'is decoy': []})
     f = aux.filter(psms, key='score', is_decoy='is decoy', fdr=0.1)
     self.assertEqual(f.shape[0], 0)
     f = aux.qvalues(psms, key='score', is_decoy='is decoy', remove_decoy=False, formula=1, full_output=True, fdr=0.01)
     self.assertEqual(f.shape[0], 0)
Beispiel #13
0
 def test_filter_empty_dataframe(self):
     dtype = [('score', np.int8), ('label', np.str_, 1), ('pep', np.float64)]
     psms = pd.DataFrame(np.array([], dtype=dtype))
     f = aux.filter(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, fdr=0.1)
     self.assertEqual(f.shape[0], 0)
     f = aux.qvalues(psms, key=self.key, is_decoy=self.is_decoy, remove_decoy=False, formula=1, full_output=True, fdr=0.1)
     self.assertEqual(f.shape[0], 0)
Beispiel #14
0
def show_info(args):
    with pepxml.PepXML(args.file) as f:
        psms = list(f)
        fpsms = aux.filter(
            psms,
            is_decoy=lambda x: pepxml.is_decoy(x, args.decoy_prefix),
            fdr=args.fdr,
            key=pepxml._key,
        )
        logger.info(args.format, args.file, len(psms), fpsms.size)
Beispiel #15
0
def filter_evalue_prots(prots, FDR=1.0, remove_decoy=True, dec_prefix='DECOY_'):

    proteins = prots.items()

    isdecoy = lambda x: x[0].startswith(dec_prefix)
    escore = lambda x: float(x[1]['expect'])
    filtered_proteins = aux.filter(proteins, fdr=float(FDR) / 100, key=escore, is_decoy=isdecoy,
                                   remove_decoy=False, formula=1, full_output=True)
    qvals_e = aux.qvalues(filtered_proteins, key=escore, is_decoy=isdecoy, reverse=False, remove_decoy=False, formula=1,
                          full_output=True)
    new_prots = {}
    for val in qvals_e:
        val[-1][1]['qval'] = val[-2]
        if (not remove_decoy or not val[-1][0].startswith(dec_prefix)):
            new_prots[val[-1][0]] = val[-1][1]
    logger.info('Actual protein-level FDR = %.2f%%', aux.fdr(filtered_proteins, is_decoy=isdecoy) * 100)
    return new_prots
Beispiel #16
0
    key=lambda x: x['SpectrumIdentificationItem'][0]['MS-GF:EValue'],
    fdr=0.01)
pylab.figure()
pylab.hist(
    [psm['SpectrumIdentificationItem'][0]['chargeState'] for psm in msgf],
    bins=np.arange(5),
    align='left')
pylab.xticks(np.linspace(0, 4, 5))
pylab.xlabel('charge state')

morpheus = pd.read_table('example.PSMs.tsv')
amanda = pd.read_table('example_output.csv', skiprows=1)

morph_filt = aux.filter(morpheus,
                        fdr=0.01,
                        key='Morpheus Score',
                        reverse=True,
                        is_decoy='Decoy?')

pylab.figure()
morph_filt.plot(x='Retention Time (minutes)',
                y='Precursor Mass (Da)',
                kind='scatter')

amanda['isDecoy'] = [
    all(s.startswith('DECOY') for s in prot.split(';'))
    for prot in amanda['Protein Accessions']
]
amanda_filt = aux.filter(amanda[amanda['Rank'] == 1],
                         key='Weighted Probability',
                         is_decoy='isDecoy',