def copy(self):
     fc = FileContainer()
     
     for size, element in self.elements:
         if isinstance(element, StringIO):
             element = StringIO(element.buf)
         else:
             element = element.copy()
         
         fc.elements.append((size, element))
     
     fc.size = self.size
     return fc
Example #2
0
# TimeSeries: ts can hold any_type; ts is subclass of np.array,same indexing;
nt=500; dt=pd.date_range('20130101',periods=nt,freq='D');
ts=pd.Series(1+randn(nt)*3,index=dt,name='ts1').shift(2); # also tshift; time_shift
ts.index=ts.index+4; ts.name='33'; ts[np.isnan(ts)]=.01; # chg idx,nm
ts.truncate(before='20131031',after='20131231'); ts.resample('5Min',how='sum'); ts.asfreq('H',method='pad') # 'mean' 'pad' 'ohlc'
ts['2013-01-01 08:00':'2013-01-31 08:19']; ts['2013']; ts[1:]+ts[:-1] # auto-nan for missing data;
ts.cumsum().plot(); ts.str.lower(); ts.order()

# DataFrame ~list of cols/ts; integrated_data_alignment - considered powerful;
# df - ~xls/sql tbl,array with lbls; index (row labels) and cols; can be init from dict,ndarray,ts; auto_fill nan;
# structured or record array ~ndaray of tuples
d2=pd.DataFrame({'a':ts[:nt-10],'b':ts.cumsum()}); # two curves with one tail clipped
nt=1e5; d3=pd.DataFrame(randn(nt,1),index=pd.date_range('20130101',periods=nt,freq='T'),columns=['A'])
d4=pd.DataFrame({'A':1.,'B':pd.Timestamp('20130102'),'C':pd.Series(1,index=range(4),dtype='float32'),'D':np.array([3]*4,dtype='int32'),'E':'foo'}) # matched on index,stratched
nt2=6; d=pd.DataFrame(np.random.randn(nt2,4),index=dt[0:nt2],columns=list('ABCD')); d0=d.copy()
len(d); d.index[1:]=d.index[-1:0:-1]; d.columns[1:]=d.columns[-1:0:-1]; d.shape; d.head(); d.tail(3); d.values; d4.dtypes; d.T; d.describe(); # general fns; len(df)=len_dt_dim; dt-lbl,f-lbl; 
d.sort_index(axis=1,ascending=F); d.sort(columns='B'); # sort by row/col lbl/data
d.to_string(); # for display
d2.plot(); plt.legend(loc='best');

# df indexing: .iat/.at(single el),.iloc/.loc(subset),.ix(general); right_endpt excluded with ix,included with ix_lbl; lbl=[0:n] to be avoided?
d[0:3],d['20130102':'20130104'] # rows only,d[1,3] breaks;
d['A'],d.A # keep idx,col_lbl; ~d(:,mm('A',f));
d.iloc[0:2,[0,2]]; d.loc[dt[0:2],['A','C']]; # idx_py_stype (excl right_end)+dim_control; ~df(mm(idx_subset,idx),:)
d.iat[0,3];        d.at[dt[0],'A']=-3 # single_elem,ptr;
# .ix=.loc+.iloc ,mixed lbl/int
x=list('abcdef'); x[8:10]; d[8:10]; # high idx allowed in np,was forbidden(not anymore?) in df;

# multi_idx,convert to cols, pivot table: xls_tbl1,graphically set up summary_tbl2="pivot table" (it pivots/rotates following chg in graph setup)
d5=pd.DataFrame({'A':['one','one','two','three']*3,'B':['a','b','c']*4,'C':['foo','foo','foo','bar','bar','bar']*2,'D':np.random.randn(12),'E':np.random.randn(12)})