d.insert(1,'bar',d['b']) # args posn,lbl,data d6=d.reindex(index=dt[[0,1,4]],columns=list(d.columns)+['E']) # can modify row/col names (can extract data and construct new df); d.rename(columns={'one' : 'foo','two' : 'bar'},index={'a' : 'apple','b' : 'banana','d' : 'durian'}) # rename # pd.DataFrame(np.asarray(d),index=new_index,columns=new_cols); # inefficient but works; d.index=xx; d.columns=xx; d.name=xx; d7=pd.DataFrame({'key':['fo','fo'],'val1':[1,2]}); d8=pd.DataFrame({'key':['fo','fo'],'val2':[4,5]}); pd.merge(d7,d8,on='key') # sql-like merge,very high eff; d.combine_first(d2) # ~fill_nan pref1,pref2, ~d(isnan(d))=d2(isnan(d)); # process nan d[0<d]; # NaN's if no data d[0<d.a]; d[0<d.iloc[:,0]]; # d(d(:,1)<0,:) select rows d.dropna(how='any'); d.fillna(value=5); pd.isnull(d) # f=lambda x:x.fillna(x.mean()); grp=xx; d3=grp.transform(f) # fill with grp mean # stat,grouping d.mean(1); ts.value_counts(); # mean etc excludes missing data d.apply(np.cumsum); d.apply(lambda x:x.max()-x.min()); d9=pd.DataFrame({'A':['fo','ba','fo','ba','fo','ba','fo','fo'],'B':['a','a','b','c','b','b','a','c'],'C':randn(8),'D':randn(8)}) d9.groupby(['A','B']).sum() d.sub(d['a'],axis=0) # subtract col A; also math ops and &| d9=pd.Series(np.random.randn(100)); factor=pd.qcut(d9,[0,.25,.5,.75,1.]); d9.groupby(factor).mean() # quintile mean # d.groupby(level=['A','B']); df.groupby(fn1,axis=1).groups ctry=np.array(['US','UK','GR','JP']); key=ctry[np.random.randint(0,4,1000)]; d2=pd.DataFrame(randn(1e3),index=key); grp=d2.groupby(key); grp.count(); grp.mean(); grp.agg(lambda x:x.std()); d2[key=='JP'].apply(lambda x:x.describe()) # grp['JP'].apply(lambda x:x.describe()) grp.keys; grp.indices; # filter_on_group (A(0<A) etc),apply(f) d=pd.DataFrame({'A':np.arange(8),'B':list('aabbbbcc')}) d.groupby('B').filter(lambda x:2<len(x),dropna=F); # (lambda x:2<x.sum()); drop unwanted data def f(grp): return pd.DataFrame({'original':grp,'demeaned':grp-grp.mean()}) d=d0.copy(); d['A']=[1,1,2,2,3,3]; d.groupby('A')['C'].apply(f)