def test_transform_transformation_func(request, transformation_func): # GH 30918 df = DataFrame( { "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"], "B": [1, 2, np.nan, 3, 3, np.nan, 4], }, index=date_range("2020-01-01", "2020-01-07"), ) # TODO(2.0) Remove after pad/backfill deprecation enforced transformation_func = maybe_normalize_deprecated_kernels( transformation_func) if transformation_func == "cumcount": test_op = lambda x: x.transform("cumcount") mock_op = lambda x: Series(range(len(x)), x.index) elif transformation_func == "fillna": test_op = lambda x: x.transform("fillna", value=0) mock_op = lambda x: x.fillna(value=0) elif transformation_func == "tshift": msg = ("Current behavior of groupby.tshift is inconsistent with other " "transformations. See GH34452 for more details") request.node.add_marker(pytest.mark.xfail(reason=msg)) else: test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() result = test_op(df.groupby("A")) groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]] expected = concat([mock_op(g) for g in groups]) if transformation_func == "cumcount": tm.assert_series_equal(result, expected) else: tm.assert_frame_equal(result, expected)
def test_transform_groupby_kernel_frame(axis, float_frame, op): # TODO(2.0) Remove after pad/backfill deprecation enforced op = maybe_normalize_deprecated_kernels(op) # GH 35964 args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index": ones = np.ones(float_frame.shape[0]) else: ones = np.ones(float_frame.shape[1]) expected = float_frame.groupby(ones, axis=axis).transform(op, *args) result = float_frame.transform(op, axis, *args) tm.assert_frame_equal(result, expected) # same thing, but ensuring we have multiple blocks assert "E" not in float_frame.columns float_frame["E"] = float_frame["A"].copy() assert len(float_frame._mgr.arrays) > 1 if axis == 0 or axis == "index": ones = np.ones(float_frame.shape[0]) else: ones = np.ones(float_frame.shape[1]) expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) result2 = float_frame.transform(op, axis, *args) tm.assert_frame_equal(result2, expected2)
def test_transform_axis_1(request, transformation_func): # GH 36308 # TODO(2.0) Remove after pad/backfill deprecation enforced transformation_func = maybe_normalize_deprecated_kernels( transformation_func) warn = None if transformation_func == "tshift": warn = FutureWarning request.node.add_marker( pytest.mark.xfail(reason="tshift is deprecated")) args = ("ffill", ) if transformation_func == "fillna" else () df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) with tm.assert_produces_warning(warn): result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T if transformation_func in ["diff", "shift"]: # Result contains nans, so transpose coerces to float expected["b"] = expected["b"].astype("int64") # cumcount returns Series; the rest are DataFrame tm.assert_equal(result, expected)
def test_groupby_preserves_subclass(obj, groupby_func): # GH28330 -- preserve subclass through groupby operations if isinstance(obj, Series) and groupby_func in {"corrwith"}: pytest.skip("Not applicable") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) grouped = obj.groupby(np.arange(0, 10)) # Groups should preserve subclass type assert isinstance(grouped.get_group(0), type(obj)) args = [] if groupby_func in {"fillna", "nth"}: args.append(0) elif groupby_func == "corrwith": args.append(obj) elif groupby_func == "tshift": args.extend([0, 0]) result1 = getattr(grouped, groupby_func)(*args) result2 = grouped.agg(groupby_func, *args) # Reduction or transformation kernels should preserve type slices = {"ngroup", "cumcount", "size"} if isinstance(obj, DataFrame) and groupby_func in slices: assert isinstance(result1, obj._constructor_sliced) else: assert isinstance(result1, type(obj)) # Confirm .agg() groupby operations return same results if isinstance(result1, DataFrame): tm.assert_frame_equal(result1, result2) else: tm.assert_series_equal(result1, result2)
def test_transform_groupby_kernel_frame( axis, float_frame, op, using_array_manager, request ): # TODO(2.0) Remove after pad/backfill deprecation enforced op = maybe_normalize_deprecated_kernels(op) # GH 35964 if using_array_manager and op == "pct_change" and axis in (1, "columns"): # TODO(ArrayManager) shift with axis=1 request.node.add_marker( pytest.mark.xfail( reason="shift axis=1 not yet implemented for ArrayManager" ) ) args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index": ones = np.ones(float_frame.shape[0]) else: ones = np.ones(float_frame.shape[1]) expected = float_frame.groupby(ones, axis=axis).transform(op, *args) result = float_frame.transform(op, axis, *args) tm.assert_frame_equal(result, expected) # same thing, but ensuring we have multiple blocks assert "E" not in float_frame.columns float_frame["E"] = float_frame["A"].copy() assert len(float_frame._mgr.arrays) > 1 if axis == 0 or axis == "index": ones = np.ones(float_frame.shape[0]) else: ones = np.ones(float_frame.shape[1]) expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) result2 = float_frame.transform(op, axis, *args) tm.assert_frame_equal(result2, expected2)
def test_transform_groupby_kernel_series(string_series, op): # GH 35964 # TODO(2.0) Remove after pad/backfill deprecation enforced op = maybe_normalize_deprecated_kernels(op) args = [0.0] if op == "fillna" else [] ones = np.ones(string_series.shape[0]) expected = string_series.groupby(ones).transform(op, *args) result = string_series.transform(op, 0, *args) tm.assert_series_equal(result, expected)
def test_transform_groupby_kernel_series(request, string_series, op): # GH 35964 if op == "ngroup": request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") ) # TODO(2.0) Remove after pad/backfill deprecation enforced op = maybe_normalize_deprecated_kernels(op) args = [0.0] if op == "fillna" else [] ones = np.ones(string_series.shape[0]) expected = string_series.groupby(ones).transform(op, *args) result = string_series.transform(op, 0, *args) tm.assert_series_equal(result, expected)