'an array of daily returns from price array' g = np.zeros_like(prices) g[1:] = (prices[1:]-prices[:-1])/prices[:-1] return g def volume_code(volume): 'code the continuous volume data categorically' ind = np.searchsorted([1e5,1e6, 5e6,10e6, 1e7], volume) return ind summaryfuncs = ( ('date', lambda x: [thisdate.year for thisdate in x], 'years'), ('date', lambda x: [thisdate.month for thisdate in x], 'months'), ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'), ('adj_close', daily_return, 'dreturn'), ('volume', volume_code, 'volcode'), ) rsum = mlab.rec_summarize(r, summaryfuncs) stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) print 'summary by years' ry = mlab.rec_groupby(rsum, ('years',), stats) print mlab. rec2txt(ry) print 'summary by months' rm = mlab.rec_groupby(rsum, ('months',), stats) print mlab.rec2txt(rm) print 'summary by year and month' rym = mlab.rec_groupby(rsum, ('years','months'), stats) print mlab.rec2txt(rym)
return ind # a list of (dtype_name, summary_function, output_dtype_name). # rec_summarize will call on each function on the indicated recarray # attribute, and the result assigned to output name in the return # record array. summaryfuncs = ( ('date', lambda x: [thisdate.year for thisdate in x], 'years'), ('date', lambda x: [thisdate.month for thisdate in x], 'months'), ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'), ('adj_close', daily_return, 'dreturn'), ('volume', volume_code, 'volcode'), ) rsum = mlab.rec_summarize(r, summaryfuncs) # stats is a list of (dtype_name, function, output_dtype_name). # rec_groupby will summarize the attribute identified by the # dtype_name over the groups in the groupby list, and assign the # result to the output_dtype_name stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) # you can summarize over a single variable, like years or months print 'summary by years' ry = mlab.rec_groupby(rsum, ('years', ), stats)