Example #1
0
imp_feat_lig3 = np.genfromtxt(home_folder+'imp_files/imp_feat_lig3.txt', dtype=int)
imp_feat_lig2 = np.genfromtxt(home_folder+'imp_files/imp_feat_lig2.txt', dtype=int)

prodes3 = np.genfromtxt(home_folder+'imp_files/prodes3.txt')
prodes2 = np.genfromtxt(home_folder+'imp_files/prodes2.txt')
# classifiers
clfRF3 = joblib.load(home_folder+"clf_files/clfRF.pkl")
clfRF2 = joblib.load(home_folder+"clf_files/clfRF2.pkl")

uniT = []
temp = []

for i in set(uniprotTP):
   uniT.append(i)
   temp.append(uniprotTP.index(i))

idx = np.asarray(temp)
pd2 = prodes2[idx]		#(1473, 35)

from collections import defaultdict
r2 = defaultdict(lambda : defaultdict(float))
r3 = defaultdict(lambda : defaultdict(float))

from datetime import datetime
#print "Your job started at ", datetime.now()
t1 = datetime.now()

for m in mols:
   #lig = m.GetProp('DATABASE_ID')
   lig = m.GetProp('_Name')
Example #2
0
# multi_idx,convert to cols, pivot table: xls_tbl1,graphically set up summary_tbl2="pivot table" (it pivots/rotates following chg in graph setup)
d5=pd.DataFrame({'A':['one','one','two','three']*3,'B':['a','b','c']*4,'C':['foo','foo','foo','bar','bar','bar']*2,'D':np.random.randn(12),'E':np.random.randn(12)})
tp1=zip(*[['bar','bar','baz','baz','foo','foo','qux','qux'],['one','two','one','two','one','two','one','two']])
ix1=pd.MultiIndex.from_tuples(tp1,names=['first','second']); d5=pd.DataFrame(randn(8,4),index=ix1,columns=['A','B','C','D']); d6=d5[:4]; d7=d5.stack(); d7.unstack(); # multi-idx can be converted to cols
###? result.columns.levels # labels for multi-index; multi-ix order matters
# pd.pivot_table(d5,values='D',rows=['A','B'],cols=['C']) # summary_table - grp by A,B,C

# concat df
d=d0.copy(); pd.concat([d[:2],d[2:5],d[5:]]) # rows
pd.concat([d.ix[:,'A':'B'],d.ix[1:3,'C':'D']],axis=1) # cols; note df_single_col=TimeSeries
# o1=pd.concat([p1,p2,p3],keys=['first','second','third'],join='outer') # generates hierarchial_multi-index (multi-ix order matters); can use multiple_keys,dict etc;
d=pd.DataFrame(randn(10,4),columns=['a','b','c','d'],index=[pd.core.common.rands(5) for _ in xrange(10)]) # rand_strings
pd.concat([d.ix[:7,['a','b']],d.ix[2:-2,['c']],d.ix[-7:,['d']]],axis=1,join_axes=[d.index]) # ix_orig (othw ix_sorted)
pd.concat([d.ix[:7,['a','b']],d.ix[2:-2,['c']],d.ix[-7:,['d']]],join='inner')
# add_row/col,copy,reindex,sql-like merge,fill_nan
ts2=pd.Series([1,3,5,np.nan,6,8],index=dt[:6]); d.append([d.ix[1,],d.ix[0,]]); d.append(ts2.T,ignore_index=True); # d is NOT modified; append rows broken???
d.loc[:,'d']=np.array([5]*len(d)); d['g']=ts2[0:4] # cols; data outside of "master date list" is lost
d5=d.copy(); d6=d4.pop('C'); del d['g']
d.insert(1,'bar',d['b']) # args posn,lbl,data
d6=d.reindex(index=dt[[0,1,4]],columns=list(d.columns)+['E']) # can modify row/col names (can extract data and construct new df);
d.rename(columns={'one' : 'foo','two' : 'bar'},index={'a' : 'apple','b' : 'banana','d' : 'durian'}) # rename
# pd.DataFrame(np.asarray(d),index=new_index,columns=new_cols); # inefficient but works; d.index=xx; d.columns=xx; d.name=xx;
d7=pd.DataFrame({'key':['fo','fo'],'val1':[1,2]}); d8=pd.DataFrame({'key':['fo','fo'],'val2':[4,5]}); pd.merge(d7,d8,on='key') # sql-like merge,very high eff;
d.combine_first(d2) # ~fill_nan pref1,pref2, ~d(isnan(d))=d2(isnan(d));

# process nan
d[0<d]; # NaN's if no data
d[0<d.a]; d[0<d.iloc[:,0]]; # d(d(:,1)<0,:) select rows
d.dropna(how='any'); d.fillna(value=5); pd.isnull(d)
# f=lambda x:x.fillna(x.mean()); grp=xx; d3=grp.transform(f) # fill with grp mean