Example #1
0
pd.concat([d.ix[:7,['a','b']],d.ix[2:-2,['c']],d.ix[-7:,['d']]],join='inner')
# add_row/col,copy,reindex,sql-like merge,fill_nan
ts2=pd.Series([1,3,5,np.nan,6,8],index=dt[:6]); d.append([d.ix[1,],d.ix[0,]]); d.append(ts2.T,ignore_index=True); # d is NOT modified; append rows broken???
d.loc[:,'d']=np.array([5]*len(d)); d['g']=ts2[0:4] # cols; data outside of "master date list" is lost
d5=d.copy(); d6=d4.pop('C'); del d['g']
d.insert(1,'bar',d['b']) # args posn,lbl,data
d6=d.reindex(index=dt[[0,1,4]],columns=list(d.columns)+['E']) # can modify row/col names (can extract data and construct new df);
d.rename(columns={'one' : 'foo','two' : 'bar'},index={'a' : 'apple','b' : 'banana','d' : 'durian'}) # rename
# pd.DataFrame(np.asarray(d),index=new_index,columns=new_cols); # inefficient but works; d.index=xx; d.columns=xx; d.name=xx;
d7=pd.DataFrame({'key':['fo','fo'],'val1':[1,2]}); d8=pd.DataFrame({'key':['fo','fo'],'val2':[4,5]}); pd.merge(d7,d8,on='key') # sql-like merge,very high eff;
d.combine_first(d2) # ~fill_nan pref1,pref2, ~d(isnan(d))=d2(isnan(d));

# process nan
d[0<d]; # NaN's if no data
d[0<d.a]; d[0<d.iloc[:,0]]; # d(d(:,1)<0,:) select rows
d.dropna(how='any'); d.fillna(value=5); pd.isnull(d)
# f=lambda x:x.fillna(x.mean()); grp=xx; d3=grp.transform(f) # fill with grp mean

# stat,grouping
d.mean(1); ts.value_counts(); # mean etc excludes missing data
d.apply(np.cumsum); d.apply(lambda x:x.max()-x.min());

d9=pd.DataFrame({'A':['fo','ba','fo','ba','fo','ba','fo','fo'],'B':['a','a','b','c','b','b','a','c'],'C':randn(8),'D':randn(8)})
d9.groupby(['A','B']).sum()
d.sub(d['a'],axis=0) # subtract col A; also math ops and &|
d9=pd.Series(np.random.randn(100)); factor=pd.qcut(d9,[0,.25,.5,.75,1.]); d9.groupby(factor).mean() # quintile mean
# d.groupby(level=['A','B']); df.groupby(fn1,axis=1).groups
ctry=np.array(['US','UK','GR','JP']); key=ctry[np.random.randint(0,4,1000)]; d2=pd.DataFrame(randn(1e3),index=key);
grp=d2.groupby(key); grp.count(); grp.mean(); grp.agg(lambda x:x.std()); d2[key=='JP'].apply(lambda x:x.describe()) # grp['JP'].apply(lambda x:x.describe())
grp.keys; grp.indices;