Ejemplo n.º 1
0
def prot_peptides(prot_seq,
                  enzyme,
                  mc,
                  minlen,
                  maxlen,
                  is_decoy,
                  dont_use_seen_peptides=False):

    dont_use_fast_valid = parser.fast_valid(prot_seq)
    peptides = parser.cleave(prot_seq, enzyme, mc)
    for pep in peptides:
        plen = len(pep)
        if minlen <= plen <= maxlen:
            forms = []
            if dont_use_fast_valid or pep in seen_target or pep in seen_decoy or parser.fast_valid(
                    pep):
                if plen <= maxlen:
                    forms.append(pep)
            for f in forms:
                if dont_use_seen_peptides:
                    yield f
                else:
                    if f not in seen_target and f not in seen_decoy:
                        if is_decoy:
                            seen_decoy.add(f)
                        else:
                            seen_target.add(f)
                        yield f
Ejemplo n.º 2
0
 def test_fast_valid(self):
     for j in range(50):
         L = random.randint(1, 10)
         peptide = ''.join([random.choice(self.labels) for _ in range(L)])
         self.assertTrue(parser.fast_valid(peptide, labels=self.labels))
         self.assertTrue(parser.valid(peptide, labels=self.labels))
         self.assertTrue(parser.valid(peptide))
         for aa in set(peptide):
             bad = peptide.replace(aa, 'Z')
             self.assertFalse(parser.fast_valid(bad, labels=self.labels))
             self.assertFalse(parser.valid(bad, labels=self.labels))
Ejemplo n.º 3
0
def fraction_of_by(peptide_seq, precursor_mz, precursor_charge, mz, intensity):
    if not parser.fast_valid(peptide_seq):
        print("Invalid peptide sequence encountered", file=sys.stderr)
        return 0.0
    spec = sus.MsmsSpectrum(peptide_seq,
                            precursor_mz=precursor_mz,
                            precursor_charge=precursor_charge,
                            mz=mz,
                            intensity=intensity,
                            peptide=peptide_seq)
    fragment_tol_mass = 50
    fragment_tol_mode = 'ppm'
    spectrum = (spectrum.set_mz_range(
        min_mz=100, max_mz=1400).remove_precursor_peak(
            fragment_tol_mass,
            fragment_tol_mode).annotate_peptide_fragments(fragment_tol_mass,
                                                          fragment_tol_mode,
                                                          ion_types='by'))
    current, by_current = 0., 0.
    for ix in range(len(spectrum.intensity)):
        current += spectrum.intensity[ix]
        if spectrum.annotation[ix] != None:
            by_current += spectrum.intensity[ix]
    if current > 0.:
        return by_current / current
    else:
        return 0.0
Ejemplo n.º 4
0
def prot_peptides(prot_seq, enzyme, mc, minlen, maxlen, is_decoy, dont_use_seen_peptides=False, snp=False, desc=False, position=False, semitryptic=False):

    dont_use_fast_valid = parser.fast_valid(prot_seq)
    methionine_check = prot_seq[0] == 'M'
    if snp == 2:
        if desc:
            try:
                tmp = desc.split(' ')[0].split('|')
                pos = int(tmp[1]) - 1
                aach = tmp[2]
            except:
                desc = False
    # peptides = cparser._cleave(prot_seq, enzyme, mc)
    # for pep, startposition in peptides:
    #     plen = len(pep)
    for pep, startposition, plen in get_peptides(prot_seq, enzyme, mc, minlen, maxlen, semitryptic):
        loopcnt = 0
        if pep not in seen_target and pep not in seen_decoy and (dont_use_fast_valid or parser.fast_valid(pep)):
            loopcnt = 1
            if methionine_check and startposition == 0:
                if minlen <= plen - 2:
                    loopcnt = 3
                elif minlen <= plen - 1:
                    loopcnt = 2
        while loopcnt:
            f = pep[loopcnt-1:]
            if dont_use_seen_peptides:
                if snp == 1:
                    for ff, seq_new in custom_snp(f, startposition):
                        if not seq_new:
                            yield ff if not position else (ff, startposition)
                        else:
                            yield ff if not position else (ff, startposition)
                else:
                    yield f if not position else (f, startposition)
            else:
                if f not in seen_target and f not in seen_decoy:
                    if is_decoy:
                        seen_decoy.add(f)
                    else:
                        seen_target.add(f)
                    if snp == 1:
                        for ff, seq_new in custom_snp(f, startposition):
                            if not seq_new:
                                yield ff if not position else (ff, startposition)
                            if seq_new not in seen_decoy and seq_new not in seen_target:
                                yield ff if not position else (ff, startposition)
                    elif snp == 2:
                        if desc and startposition <= pos <= startposition + plen:
                            if len(aach) == 3 and aach[0] in parser.std_amino_acids and aach[2] in parser.std_amino_acids:
                                pos_diff = pos - startposition
                                f = f[:pos_diff] + 'snp%sto%sat%ssnp' % (aach.split('>')[0], aach.split('>')[-1], pos) + f[pos_diff+1:]
                                yield f if not position else (f, startposition)
                        else:
                            yield f if not position else (f, startposition)
                    else:
                        yield f if not position else (f, startposition)
            loopcnt -= 1
Ejemplo n.º 5
0
def fraction_of_by_seq(peptide_seq, precursor_mz, precursor_charge, mz,
                       intensity):
    if not parser.fast_valid(peptide_seq):
        print("Invalid peptide sequence encountered", file=sys.stderr)
        return 0.0
    spec = sus.MsmsSpectrum(peptide_seq,
                            precursor_mz=precursor_mz,
                            precursor_charge=precursor_charge,
                            mz=mz,
                            intensity=intensity,
                            peptide=peptide_seq)
    return fraction_of_by(spec)
Ejemplo n.º 6
0
 def test_valid(self):
     for j in range(50):
         L = random.randint(1, 10)
         peptide = ''.join([random.choice(self.labels) for _ in range(L)])
         modseqs = parser.isoforms(peptide, variable_mods=self.potential,
                 fixed_mods=self.constant, labels=self.labels)
         self.assertFalse(parser.valid('H-' + peptide, labels=self.labels))
         for s in modseqs:
             self.assertTrue(parser.valid(s, labels=self.extlabels))
             for aa in set(peptide):
                 bad = s.replace(aa, 'Z')
                 self.assertFalse(parser.fast_valid(bad, labels=self.labels))
                 self.assertFalse(parser.valid(bad, labels=self.labels))
Ejemplo n.º 7
0
def rt_filtering(results, settings):
    settings = settings.copy()
    if settings.has_option('misc', 'legend'):
        legend = settings.get('misc', 'legend')
    else:
        legend = None
    RTexp, seqs = zip(*[(utils.get_RT(res['spectrum']),
                         res['candidates'][0][1]) for res in results])
    if legend is not None:
        stdl = set(parser.std_labels)
        newseqs = []
        for s in seqs:
            if parser.fast_valid(s):
                newseqs.append(list(s))
            else:
                seq = []
                c, n = False, False
                for c in s:
                    if c in stdl:
                        seq.append(c)
                    else:
                        mod, res, term = legend[c]
                        if res == '-':
                            if term == '[':
                                seq.append(mod + '-')
                                n = True
                            else:
                                seq.append('-' + mod)
                                c = True
                        else:
                            seq.append(mod + res)
                    if not n: seq.append(parser.std_nterm)
                    if not c: seq.append(parser.std_cterm)
                newseqs.append(seq)
        seqs = newseqs
    RTexp = [float(x) for x in RTexp]
    if np.allclose(RTexp, 0):
        logger.warning('RT is missing. Skipping RT optimization.')
        return settings
    RC_def = achrom.RCs_gilar_rp
    xdict = {}
    for key, val in RC_def['aa'].items():
        xdict[key] = [val, None]
    RC_dict = utils.get_RCs_vary_lcp(seqs, RTexp)
    RC_dict_new = dict()
    for key, val in RC_dict['aa'].items():
        xdict.setdefault(key, [val, None])[1] = val
    a, b, _, _ = aux.linear_regression(
        [x[0] for x in xdict.values() if x[1] != None],
        [x[1] for x in xdict.values() if x[1] != None])
    for key, x in xdict.items():
        if x[1] == None:
            x[1] = x[0] * a + b
        RC_dict_new[key] = x[1]
    if legend is not None:
        for k, v in legend.items():
            if len(k) == 1: continue
            if k[-1] in '[]':
                if k[-2] == '-':
                    kk = ('-' + k[1:-1]) if k[-1] == ']' else (k[:-1])
                else:
                    kk = k[:-1]
            elif len(k) > 1:
                kk = k
            logger.debug('%s -> %s', k, kk)
            if kk in RC_dict_new:
                RC_dict_new[v] = RC_dict_new[kk]
            else:
                if kk[-1].isupper():
                    kkk = kk[-1]
                elif kk[-1] == '-':
                    kkk = parser.std_nterm
                elif kk[0] == '-':
                    kkk = parser.std_cterm
                RC_dict_new[v] = RC_dict_new.get(kkk, 0)
                logger.info('No RC for %s, using %s or 0: %s', kk, kkk,
                            RC_dict_new[v])

    RC_dict['aa'] = RC_dict_new

    logger.debug('RC dict: %s', RC_dict)
    rtexp = np.array([np.mean(x) for x in RTexp])
    rttheor = np.array(
        [calculate_RT(pep, RC_dict, raise_no_mod=False) for pep in seqs])
    deltaRT = rtexp - rttheor
    logger.debug('Linear regression: %s',
                 aux.linear_regression(rtexp, rttheor))
    best_RT_l = scoreatpercentile(deltaRT, 0.05)
    best_RT_r = scoreatpercentile(deltaRT, 99.95)

    def condition(spectrum, cand, _, stored_value=False):
        if not stored_value:
            stored_value = calculate_RT(cand, RC_dict)
        rtd = spectrum['RT'] - stored_value
        return best_RT_l <= rtd <= best_RT_r, stored_value

    settings.set('scoring', 'condition', condition)
    return settings