Python groupby Examples

Programming Language: Python

Namespace/Package Name: pandas.lib.tseries

Method/Function: groupby

Examples at hotexamples.com: 5

Python groupby - 5 examples found. These are the top rated real world Python examples of pandas.lib.tseries.groupby extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def groups(self):
        if self._groups is None:
            axis = self._group_axis
            self._groups = tseries.groupby(axis, self.grouper,
                                           output=GroupDict())

        return self._groups

Example #2

Show file

File: bench.py Project: choketsu/pandas

def bench_groupby():
    N = 200

    arr = np.arange(10000).astype(object)
    values = np.random.randn(10000)
    keys = arr // 10
    d = dict(zip(arr, keys))

    f = lambda: groupby_nocython(arr, d.get)
    print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.arrmap(arr, d.get)
    timing = _timeit(f, n=N) * 1000
    print 'arrmap: %.2f ms per iteration' % timing

    f = lambda: isnull(tseries.arrmap(arr, d.get))
    print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing)

    f = lambda: tseries.groupby(arr, d.get)
    print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.groupby_indices(arr, d.get)
    print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    def _test():
        groups = tseries.groupby_indices(arr, d.get)

        result = {}
        for k, v in groups.iteritems():
            result[k] = np.mean(values.take(v))

        return result

    print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)

Example #3

Show file

File: bench_tseries.py Project: theandygross/pandas

def bench_groupby():
    N = 200

    arr = np.arange(10000).astype(object)
    values = np.random.randn(10000)
    keys = arr // 10
    d = dict(zip(arr, keys))

    f = lambda: groupby_nocython(arr, d.get)
    print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.arrmap(arr, d.get)
    timing = _timeit(f, n=N) * 1000
    print 'arrmap: %.2f ms per iteration' % timing

    f = lambda: isnull(tseries.arrmap(arr, d.get))
    print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing)

    f = lambda: tseries.groupby(arr, d.get)
    print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    f = lambda: tseries.groupby_indices(arr, d.get)
    print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)

    def _test():
        groups = tseries.groupby_indices(arr, d.get)

        result = {}
        for k, v in groups.iteritems():
            result[k] = np.mean(values.take(v))

        return result

    print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)

Example #4

Show file

File: bench.py Project: choketsu/pandas

def test_groupby():
    mapping = Series({
        1 : 2.,
        2 : 2.,
        3 : np.NaN,
        4 : np.NaN,
        5 : 3.,
        6 : 3.,
        7 : np.NaN
    })

    index = Index([1, 2, 3, 4, 5, 6, 7])

    expected = {
        2 : [1, 2],
        3 : [5, 6],
        np.NaN : [3, 4, 7]
    }

    def compare_with_null(d1, d2):
        d1_nulls = None
        d2_nulls = None
        for k, v in d1.iteritems():
            if _isnan(k):
                d1_nulls = v
            else:
                assert(k in d2)
                assert(np.array_equal(v, d2[k]))

        for k, v in d2.iteritems():
            if _isnan(k):
                d2_nulls = v
            else:
                assert(k in d1)

        if d1_nulls is not None or d2_nulls is not None:
            assert(np.array_equal(d1_nulls, d2_nulls))

    grouped = tseries.groupby(index, mapping.get)
    compare_with_null(grouped, expected)

Example #5

Show file

File: bench_tseries.py Project: theandygross/pandas

def test_groupby():
    mapping = Series({
        1: 2.,
        2: 2.,
        3: np.NaN,
        4: np.NaN,
        5: 3.,
        6: 3.,
        7: np.NaN
    })

    index = Index([1, 2, 3, 4, 5, 6, 7])

    expected = {2: [1, 2], 3: [5, 6], np.NaN: [3, 4, 7]}

    def compare_with_null(d1, d2):
        d1_nulls = None
        d2_nulls = None
        for k, v in d1.iteritems():
            if _isnan(k):
                d1_nulls = v
            else:
                assert (k in d2)
                assert (np.array_equal(v, d2[k]))

        for k, v in d2.iteritems():
            if _isnan(k):
                d2_nulls = v
            else:
                assert (k in d1)

        if d1_nulls is not None or d2_nulls is not None:
            assert (np.array_equal(d1_nulls, d2_nulls))

    grouped = tseries.groupby(index, mapping.get)
    compare_with_null(grouped, expected)