コード例 #1
0
def extract_deamids(mod_str):
    mod_list = [ mod.strip() for mod in mod_str.split(',') ]
    return pd.Series( ms.parse_spectrum_modifications(mod) for mod in mod_list if bool(deamid.search(mod)) )
コード例 #2
0




# enumerating all tractable peptides from pep_df ...
glyco_mod = []
for uniq_pept, pept_pos, prot_sr in pep_df[['pept','peptide_start','prot_seqrec']].itertuples(index=False):
    prot_seq = str(prot_sr)
    pept_spectrum = spec_info[ spec_info['pept']==uniq_pept ]
    # let's check all present modifications in the flatten-out list of lists ...
    modifs = [ mod for cmod in pept_spectrum['Variable modifications identified by spectrum'] for mod in cmod.strip().split(',') ]
    # and get those that are unique ...
    modifs = np.unique(modifs)
    # now extract type,position and value for each of them ...
    modifs = [ ms.parse_spectrum_modifications(mod) for mod in modifs if bool(deamid.search(mod)) ]
    # now extrating meaningfull glycosilation sites ...
    glyco_sites = []
    glyco_start = []
    # looks like the inner loop here is the only place where we do need 0-based indexing switching ...
    for type_aa,gpos_pept,value in modifs:
        if (type_aa in ['n','N']) and (np.abs(value-3)<0.1):
            # 'pept_pos' - is 1-based absolute poisition of the peptide in the protein ...
            # 'gpos_pept' - is 1-based relative position of gsite_start_N in the peptide ...
            gsite_start = pept_pos + gpos_pept-1 # 1-based coordinate ...
            gsite_stop  = pept_pos + gpos_pept-1 + 3-1 # 1-based coordinate ...
            glyco_start.append(gsite_start)
            # Due to slicing rules, we need [start-1:stop], no have position 'stop' included ...
            glyco_sites.append(prot_seq[gsite_start-1:gsite_stop])
    ############################################################
    # gstart must be 1-based for output ...