def test_series_grouper_requires_nonempty_raises():
    # GH#29500
    obj = Series(np.random.randn(10))
    dummy = obj.iloc[:0]
    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp)

    with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"):
        libreduction.SeriesGrouper(dummy, np.mean, labels, 2)
def test_series_grouper():
    obj = Series(np.random.randn(10))

    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp)

    grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2)
    result, counts = grouper.get_result()

    expected = np.array([obj[3:6].mean(), obj[6:].mean()])
    tm.assert_almost_equal(result, expected)

    exp_counts = np.array([3, 4], dtype=np.int64)
    tm.assert_almost_equal(counts, exp_counts)
Exemple #3
0
def test_series_grouper_result_length_difference():
    # GH 40014
    obj = Series(np.random.randn(10), dtype="float64")
    obj.index = obj.index.astype("O")
    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp)

    grouper = libreduction.SeriesGrouper(obj, lambda x: all(x > 0), labels, 2)
    result, counts = grouper.get_result()

    expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)], dtype=object)
    tm.assert_equal(result, expected)

    exp_counts = np.array([3, 4], dtype=np.int64)
    tm.assert_equal(counts, exp_counts)
def test_series_grouper():
    from pandas import Series
    obj = Series(np.random.randn(10))
    dummy = obj[:0]

    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)

    grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
    result, counts = grouper.get_result()

    expected = np.array([obj[3:6].mean(), obj[6:].mean()])
    assert_almost_equal(result, expected)

    exp_counts = np.array([3, 4], dtype=np.int64)
    assert_almost_equal(counts, exp_counts)
Exemple #5
0
    def _aggregate_series_fast(self, obj, func):
        # At this point we have already checked that obj.index is not a MultiIndex
        #  and that obj is backed by an ndarray, not ExtensionArray
        func = self._is_builtin_func(func)

        group_index, _, ngroups = self.group_info

        # avoids object / Series creation overhead
        dummy = obj._get_values(slice(None, 0))
        indexer = get_group_index_sorter(group_index, ngroups)
        obj = obj.take(indexer)
        group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
        result, counts = grouper.get_result()
        return result, counts
Exemple #6
0
    def _aggregate_series_fast(self, obj, func):
        func = self._is_builtin_func(func)

        if obj.index._has_complex_internals:
            raise TypeError("Incompatible index for Cython grouper")

        group_index, _, ngroups = self.group_info

        # avoids object / Series creation overhead
        dummy = obj._get_values(slice(None, 0))
        indexer = get_group_index_sorter(group_index, ngroups)
        obj = obj.take(indexer)
        group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
        result, counts = grouper.get_result()
        return result, counts
Exemple #7
0
    def _aggregate_series_fast(self, obj: Series, func: F) -> npt.NDArray[np.object_]:
        # At this point we have already checked that
        #  - obj.index is not a MultiIndex
        #  - obj is backed by an ndarray, not ExtensionArray
        #  - len(obj) > 0
        func = com.is_builtin_func(func)

        ids, _, ngroups = self.group_info

        # avoids object / Series creation overhead
        indexer = get_group_index_sorter(ids, ngroups)
        obj = obj.take(indexer)
        ids = ids.take(indexer)
        sgrouper = libreduction.SeriesGrouper(obj, func, ids, ngroups)
        result, _ = sgrouper.get_result()
        return result
Exemple #8
0
    def _aggregate_series_fast(self, obj: Series, func: F):
        # At this point we have already checked that
        #  - obj.index is not a MultiIndex
        #  - obj is backed by an ndarray, not ExtensionArray
        #  - len(obj) > 0
        #  - ngroups != 0
        func = self._is_builtin_func(func)

        group_index, _, ngroups = self.group_info

        # avoids object / Series creation overhead
        indexer = get_group_index_sorter(group_index, ngroups)
        obj = obj.take(indexer)
        group_index = group_index.take(indexer)
        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups)
        result, counts = grouper.get_result()
        return result, counts
Exemple #9
0
    def _aggregate_series_fast(self, obj, func):
        func = self._is_builtin_func(func)

        # TODO: pre-empt this, also pre-empt get_result raising TypError if we pass a EA
        #   for EAs backed by ndarray we may have a performant workaround
        if obj.index._has_complex_internals:
            raise TypeError("Incompatible index for Cython grouper")

        group_index, _, ngroups = self.group_info

        # avoids object / Series creation overhead
        dummy = obj._get_values(slice(None, 0))
        indexer = get_group_index_sorter(group_index, ngroups)
        obj = obj.take(indexer)
        group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
        result, counts = grouper.get_result()
        return result, counts