def test_series_grouper_requires_nonempty_raises(): # GH#29500 obj = Series(np.random.randn(10)) dummy = obj.iloc[:0] labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp) with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"): libreduction.SeriesGrouper(dummy, np.mean, labels, 2)
def test_series_grouper(): obj = Series(np.random.randn(10)) labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp) grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2) result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) tm.assert_almost_equal(result, expected) exp_counts = np.array([3, 4], dtype=np.int64) tm.assert_almost_equal(counts, exp_counts)
def test_series_grouper_result_length_difference(): # GH 40014 obj = Series(np.random.randn(10), dtype="float64") obj.index = obj.index.astype("O") labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp) grouper = libreduction.SeriesGrouper(obj, lambda x: all(x > 0), labels, 2) result, counts = grouper.get_result() expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)], dtype=object) tm.assert_equal(result, expected) exp_counts = np.array([3, 4], dtype=np.int64) tm.assert_equal(counts, exp_counts)
def test_series_grouper(): from pandas import Series obj = Series(np.random.randn(10)) dummy = obj[:0] labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy) result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) assert_almost_equal(result, expected) exp_counts = np.array([3, 4], dtype=np.int64) assert_almost_equal(counts, exp_counts)
def _aggregate_series_fast(self, obj, func): # At this point we have already checked that obj.index is not a MultiIndex # and that obj is backed by an ndarray, not ExtensionArray func = self._is_builtin_func(func) group_index, _, ngroups = self.group_info # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)) indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() return result, counts
def _aggregate_series_fast(self, obj, func): func = self._is_builtin_func(func) if obj.index._has_complex_internals: raise TypeError("Incompatible index for Cython grouper") group_index, _, ngroups = self.group_info # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)) indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() return result, counts
def _aggregate_series_fast(self, obj: Series, func: F) -> npt.NDArray[np.object_]: # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray # - len(obj) > 0 func = com.is_builtin_func(func) ids, _, ngroups = self.group_info # avoids object / Series creation overhead indexer = get_group_index_sorter(ids, ngroups) obj = obj.take(indexer) ids = ids.take(indexer) sgrouper = libreduction.SeriesGrouper(obj, func, ids, ngroups) result, _ = sgrouper.get_result() return result
def _aggregate_series_fast(self, obj: Series, func: F): # At this point we have already checked that # - obj.index is not a MultiIndex # - obj is backed by an ndarray, not ExtensionArray # - len(obj) > 0 # - ngroups != 0 func = self._is_builtin_func(func) group_index, _, ngroups = self.group_info # avoids object / Series creation overhead indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) group_index = group_index.take(indexer) grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups) result, counts = grouper.get_result() return result, counts
def _aggregate_series_fast(self, obj, func): func = self._is_builtin_func(func) # TODO: pre-empt this, also pre-empt get_result raising TypError if we pass a EA # for EAs backed by ndarray we may have a performant workaround if obj.index._has_complex_internals: raise TypeError("Incompatible index for Cython grouper") group_index, _, ngroups = self.group_info # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)) indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() return result, counts