Example #1
0
    def groups(self):
        if self._groups is None:
            axis = self._group_axis
            self._groups = tseries.groupby(axis, self.grouper,
                                           output=GroupDict())

        return self._groups
Example #2
0
def bench_groupby():
    N = 200

    arr = np.arange(10000).astype(object)
    values = np.random.randn(10000)
    keys = arr // 10
    d = dict(zip(arr, keys))

    f = lambda: groupby_nocython(arr, d.get)
    print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.arrmap(arr, d.get)
    timing = _timeit(f, n=N) * 1000
    print 'arrmap: %.2f ms per iteration' % timing

    f = lambda: isnull(tseries.arrmap(arr, d.get))
    print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing)

    f = lambda: tseries.groupby(arr, d.get)
    print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.groupby_indices(arr, d.get)
    print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    def _test():
        groups = tseries.groupby_indices(arr, d.get)

        result = {}
        for k, v in groups.iteritems():
            result[k] = np.mean(values.take(v))

        return result

    print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)
Example #3
0
def bench_groupby():
    N = 200

    arr = np.arange(10000).astype(object)
    values = np.random.randn(10000)
    keys = arr // 10
    d = dict(zip(arr, keys))

    f = lambda: groupby_nocython(arr, d.get)
    print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.arrmap(arr, d.get)
    timing = _timeit(f, n=N) * 1000
    print 'arrmap: %.2f ms per iteration' % timing

    f = lambda: isnull(tseries.arrmap(arr, d.get))
    print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing)

    f = lambda: tseries.groupby(arr, d.get)
    print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.groupby_indices(arr, d.get)
    print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    def _test():
        groups = tseries.groupby_indices(arr, d.get)

        result = {}
        for k, v in groups.iteritems():
            result[k] = np.mean(values.take(v))

        return result

    print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)
Example #4
0
def test_groupby():
    mapping = Series({
        1 : 2.,
        2 : 2.,
        3 : np.NaN,
        4 : np.NaN,
        5 : 3.,
        6 : 3.,
        7 : np.NaN
    })

    index = Index([1, 2, 3, 4, 5, 6, 7])

    expected = {
        2 : [1, 2],
        3 : [5, 6],
        np.NaN : [3, 4, 7]
    }

    def compare_with_null(d1, d2):
        d1_nulls = None
        d2_nulls = None
        for k, v in d1.iteritems():
            if _isnan(k):
                d1_nulls = v
            else:
                assert(k in d2)
                assert(np.array_equal(v, d2[k]))

        for k, v in d2.iteritems():
            if _isnan(k):
                d2_nulls = v
            else:
                assert(k in d1)

        if d1_nulls is not None or d2_nulls is not None:
            assert(np.array_equal(d1_nulls, d2_nulls))

    grouped = tseries.groupby(index, mapping.get)
    compare_with_null(grouped, expected)
Example #5
0
def test_groupby():
    mapping = Series({
        1: 2.,
        2: 2.,
        3: np.NaN,
        4: np.NaN,
        5: 3.,
        6: 3.,
        7: np.NaN
    })

    index = Index([1, 2, 3, 4, 5, 6, 7])

    expected = {2: [1, 2], 3: [5, 6], np.NaN: [3, 4, 7]}

    def compare_with_null(d1, d2):
        d1_nulls = None
        d2_nulls = None
        for k, v in d1.iteritems():
            if _isnan(k):
                d1_nulls = v
            else:
                assert (k in d2)
                assert (np.array_equal(v, d2[k]))

        for k, v in d2.iteritems():
            if _isnan(k):
                d2_nulls = v
            else:
                assert (k in d1)

        if d1_nulls is not None or d2_nulls is not None:
            assert (np.array_equal(d1_nulls, d2_nulls))

    grouped = tseries.groupby(index, mapping.get)
    compare_with_null(grouped, expected)