# Now, extract those gsites ... dg_func = lambda x: pd.Series( ms.deamid_to_gsite(x['deamid_info'], x['start_fetched'], str(gbrecs[str(int(x['fetchid']))].seq)) ) # and add them back to the main table ... gs_res = quant_n_raw[['deamid_info','start_fetched','fetchid']].apply( dg_func, axis=1 ) quant_n_raw = quant_n_raw.merge(gs_res,left_index=True,right_index=True) print print "Now we'd need to add theoretical glycosilation sites as a separate column ..." print "full protein sequence and its length is added as well ..." # this analysis must be done, once for each 'fetchid', and then merged back to the main table ... get_theor_sites_fid = lambda fid: ms.get_theor_sites(str(gbrecs[str(fid)].seq)) get_theor_sites_number_fid = lambda fid: ms.get_theor_sites_number(str(gbrecs[str(fid)].seq)) theor_sites_info = lambda fid: pd.Series( {'fetchid':fid, 'gsites_predicted':get_theor_sites_fid(fid), 'gsites_predicted_number':get_theor_sites_number_fid(fid), 'prot_seq':str(gbrecs[str(fid)].seq), 'prot_len':len(str(gbrecs[str(fid)].seq))} ) ################################################### predicted_gsite_info = quant_n_raw['fetchid'].drop_duplicates().apply(theor_sites_info) # add back to the main table ... quant_n_raw = quant_n_raw.merge(predicted_gsite_info,on='fetchid',how='right') print "done ..." print "numbering appears to be 1-based and overall correct!" print