Example #1
0
# stat,grouping
d.mean(1); ts.value_counts(); # mean etc excludes missing data
d.apply(np.cumsum); d.apply(lambda x:x.max()-x.min());

d9=pd.DataFrame({'A':['fo','ba','fo','ba','fo','ba','fo','fo'],'B':['a','a','b','c','b','b','a','c'],'C':randn(8),'D':randn(8)})
d9.groupby(['A','B']).sum()
d.sub(d['a'],axis=0) # subtract col A; also math ops and &|
d9=pd.Series(np.random.randn(100)); factor=pd.qcut(d9,[0,.25,.5,.75,1.]); d9.groupby(factor).mean() # quintile mean
# d.groupby(level=['A','B']); df.groupby(fn1,axis=1).groups
ctry=np.array(['US','UK','GR','JP']); key=ctry[np.random.randint(0,4,1000)]; d2=pd.DataFrame(randn(1e3),index=key);
grp=d2.groupby(key); grp.count(); grp.mean(); grp.agg(lambda x:x.std()); d2[key=='JP'].apply(lambda x:x.describe()) # grp['JP'].apply(lambda x:x.describe())
grp.keys; grp.indices;

# filter_on_group (A(0<A) etc),apply(f)
d=pd.DataFrame({'A':np.arange(8),'B':list('aabbbbcc')})
d.groupby('B').filter(lambda x:2<len(x),dropna=F); # (lambda x:2<x.sum()); drop unwanted data
def f(grp): return pd.DataFrame({'original':grp,'demeaned':grp-grp.mean()})
d=d0.copy(); d['A']=[1,1,2,2,3,3]; d.groupby('A')['C'].apply(f)
def f(x): return pd.Series([x,x**2],index=['x','x^2'])
s=pd.Series(np.random.rand(5)); s.apply(f) # silent dropping irrelevant cols (e.g. std([char,float]))

import pandas.util.testing as tm; tm.N=3
def unpivot(frame):
  N,K=frame.shape
  data={'value' : frame.values.ravel('F'),'variable' : np.asarray(frame.columns).repeat(N),'date' : np.tile(np.asarray(frame.index),K)}
  return pd.DataFrame(data,columns=['date','variable','value'])
d4=unpivot(tm.makeTimeDataFrame())
d4.pivot(index='date',columns='variable',values='value') # pivot~regroup,chg idx
d4['value2']=d4['value']*2 # will get 2nd df

# Panel=3D df; items(list_DataFrames)/major_axis(rows)/minor_axis(cols)