# so far the following merge seems to be 100% sufficient for the desired final output ... # we could add on extra features if needed ... quant_n_raw = quant_info_unrolled[['pept', 'deamid_info', 'pept_with_mod', 'Weight', 'spec_name', 'Mascot Ion Score', 'Mascot Identity Score', 'Mascot Delta Ion Score', # 'prot_ident_probab', # in case of ambiguity we need more columns to merge on !!! 'pept_ident_probab']].merge(bad_info,how='right',on=['pept','spec_name'],suffixes=('','_x')) ####################################################### # Now, extract those gsites ... dg_func = lambda x: pd.Series( ms.deamid_to_gsite(x['deamid_info'], x['start_fetched'], str(gbrecs[str(int(x['fetchid']))].seq)) ) # and add them back to the main table ... gs_res = quant_n_raw[['deamid_info','start_fetched','fetchid']].apply( dg_func, axis=1 ) quant_n_raw = quant_n_raw.merge(gs_res,left_index=True,right_index=True) print print "Now we'd need to add theoretical glycosilation sites as a separate column ..." print "full protein sequence and its length is added as well ..." # this analysis must be done, once for each 'fetchid', and then merged back to the main table ... get_theor_sites_fid = lambda fid: ms.get_theor_sites(str(gbrecs[str(fid)].seq)) get_theor_sites_number_fid = lambda fid: ms.get_theor_sites_number(str(gbrecs[str(fid)].seq))