def group_indices(self): if self._group_indices is None: axis = self._group_axis self._group_indices = tseries.groupby_indices(axis, self.grouper) return self._group_indices
def bench_groupby(): N = 200 arr = np.arange(10000).astype(object) values = np.random.randn(10000) keys = arr // 10 d = dict(zip(arr, keys)) f = lambda: groupby_nocython(arr, d.get) print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000) f = lambda: tseries.arrmap(arr, d.get) timing = _timeit(f, n=N) * 1000 print 'arrmap: %.2f ms per iteration' % timing f = lambda: isnull(tseries.arrmap(arr, d.get)) print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing) f = lambda: tseries.groupby(arr, d.get) print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000) f = lambda: tseries.groupby_indices(arr, d.get) print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000) def _test(): groups = tseries.groupby_indices(arr, d.get) result = {} for k, v in groups.iteritems(): result[k] = np.mean(values.take(v)) return result print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)
def _test(): groups = tseries.groupby_indices(arr, d.get) result = {} for k, v in groups.iteritems(): result[k] = np.mean(values.take(v)) return result