d=d0.copy(); pd.concat([d[:2],d[2:5],d[5:]]) # rows pd.concat([d.ix[:,'A':'B'],d.ix[1:3,'C':'D']],axis=1) # cols; note df_single_col=TimeSeries # o1=pd.concat([p1,p2,p3],keys=['first','second','third'],join='outer') # generates hierarchial_multi-index (multi-ix order matters); can use multiple_keys,dict etc; d=pd.DataFrame(randn(10,4),columns=['a','b','c','d'],index=[pd.core.common.rands(5) for _ in xrange(10)]) # rand_strings pd.concat([d.ix[:7,['a','b']],d.ix[2:-2,['c']],d.ix[-7:,['d']]],axis=1,join_axes=[d.index]) # ix_orig (othw ix_sorted) pd.concat([d.ix[:7,['a','b']],d.ix[2:-2,['c']],d.ix[-7:,['d']]],join='inner') # add_row/col,copy,reindex,sql-like merge,fill_nan ts2=pd.Series([1,3,5,np.nan,6,8],index=dt[:6]); d.append([d.ix[1,],d.ix[0,]]); d.append(ts2.T,ignore_index=True); # d is NOT modified; append rows broken??? d.loc[:,'d']=np.array([5]*len(d)); d['g']=ts2[0:4] # cols; data outside of "master date list" is lost d5=d.copy(); d6=d4.pop('C'); del d['g'] d.insert(1,'bar',d['b']) # args posn,lbl,data d6=d.reindex(index=dt[[0,1,4]],columns=list(d.columns)+['E']) # can modify row/col names (can extract data and construct new df); d.rename(columns={'one' : 'foo','two' : 'bar'},index={'a' : 'apple','b' : 'banana','d' : 'durian'}) # rename # pd.DataFrame(np.asarray(d),index=new_index,columns=new_cols); # inefficient but works; d.index=xx; d.columns=xx; d.name=xx; d7=pd.DataFrame({'key':['fo','fo'],'val1':[1,2]}); d8=pd.DataFrame({'key':['fo','fo'],'val2':[4,5]}); pd.merge(d7,d8,on='key') # sql-like merge,very high eff; d.combine_first(d2) # ~fill_nan pref1,pref2, ~d(isnan(d))=d2(isnan(d)); # process nan d[0<d]; # NaN's if no data d[0<d.a]; d[0<d.iloc[:,0]]; # d(d(:,1)<0,:) select rows d.dropna(how='any'); d.fillna(value=5); pd.isnull(d) # f=lambda x:x.fillna(x.mean()); grp=xx; d3=grp.transform(f) # fill with grp mean # stat,grouping d.mean(1); ts.value_counts(); # mean etc excludes missing data d.apply(np.cumsum); d.apply(lambda x:x.max()-x.min()); d9=pd.DataFrame({'A':['fo','ba','fo','ba','fo','ba','fo','fo'],'B':['a','a','b','c','b','b','a','c'],'C':randn(8),'D':randn(8)}) d9.groupby(['A','B']).sum() d.sub(d['a'],axis=0) # subtract col A; also math ops and &| d9=pd.Series(np.random.randn(100)); factor=pd.qcut(d9,[0,.25,.5,.75,1.]); d9.groupby(factor).mean() # quintile mean