Ejemplo n.º 1
0
def test_transform_transformation_func(request, transformation_func):
    # GH 30918
    df = DataFrame(
        {
            "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"],
            "B": [1, 2, np.nan, 3, 3, np.nan, 4],
        },
        index=date_range("2020-01-01", "2020-01-07"),
    )
    # TODO(2.0) Remove after pad/backfill deprecation enforced
    transformation_func = maybe_normalize_deprecated_kernels(
        transformation_func)
    if transformation_func == "cumcount":
        test_op = lambda x: x.transform("cumcount")
        mock_op = lambda x: Series(range(len(x)), x.index)
    elif transformation_func == "fillna":
        test_op = lambda x: x.transform("fillna", value=0)
        mock_op = lambda x: x.fillna(value=0)
    elif transformation_func == "tshift":
        msg = ("Current behavior of groupby.tshift is inconsistent with other "
               "transformations. See GH34452 for more details")
        request.node.add_marker(pytest.mark.xfail(reason=msg))
    else:
        test_op = lambda x: x.transform(transformation_func)
        mock_op = lambda x: getattr(x, transformation_func)()

    result = test_op(df.groupby("A"))
    groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]]
    expected = concat([mock_op(g) for g in groups])

    if transformation_func == "cumcount":
        tm.assert_series_equal(result, expected)
    else:
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 2
0
def test_transform_groupby_kernel_frame(axis, float_frame, op):
    # TODO(2.0) Remove after pad/backfill deprecation enforced
    op = maybe_normalize_deprecated_kernels(op)
    # GH 35964

    args = [0.0] if op == "fillna" else []
    if axis == 0 or axis == "index":
        ones = np.ones(float_frame.shape[0])
    else:
        ones = np.ones(float_frame.shape[1])
    expected = float_frame.groupby(ones, axis=axis).transform(op, *args)
    result = float_frame.transform(op, axis, *args)
    tm.assert_frame_equal(result, expected)

    # same thing, but ensuring we have multiple blocks
    assert "E" not in float_frame.columns
    float_frame["E"] = float_frame["A"].copy()
    assert len(float_frame._mgr.arrays) > 1

    if axis == 0 or axis == "index":
        ones = np.ones(float_frame.shape[0])
    else:
        ones = np.ones(float_frame.shape[1])
    expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args)
    result2 = float_frame.transform(op, axis, *args)
    tm.assert_frame_equal(result2, expected2)
Ejemplo n.º 3
0
def test_transform_axis_1(request, transformation_func):
    # GH 36308

    # TODO(2.0) Remove after pad/backfill deprecation enforced
    transformation_func = maybe_normalize_deprecated_kernels(
        transformation_func)

    warn = None
    if transformation_func == "tshift":
        warn = FutureWarning

        request.node.add_marker(
            pytest.mark.xfail(reason="tshift is deprecated"))
    args = ("ffill", ) if transformation_func == "fillna" else ()

    df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"])
    with tm.assert_produces_warning(warn):
        result = df.groupby([0, 0, 1],
                            axis=1).transform(transformation_func, *args)
        expected = df.T.groupby([0, 0, 1]).transform(transformation_func,
                                                     *args).T

    if transformation_func in ["diff", "shift"]:
        # Result contains nans, so transpose coerces to float
        expected["b"] = expected["b"].astype("int64")

    # cumcount returns Series; the rest are DataFrame
    tm.assert_equal(result, expected)
Ejemplo n.º 4
0
def test_groupby_preserves_subclass(obj, groupby_func):
    # GH28330 -- preserve subclass through groupby operations

    if isinstance(obj, Series) and groupby_func in {"corrwith"}:
        pytest.skip("Not applicable")
    # TODO(2.0) Remove after pad/backfill deprecation enforced
    groupby_func = maybe_normalize_deprecated_kernels(groupby_func)
    grouped = obj.groupby(np.arange(0, 10))

    # Groups should preserve subclass type
    assert isinstance(grouped.get_group(0), type(obj))

    args = []
    if groupby_func in {"fillna", "nth"}:
        args.append(0)
    elif groupby_func == "corrwith":
        args.append(obj)
    elif groupby_func == "tshift":
        args.extend([0, 0])

    result1 = getattr(grouped, groupby_func)(*args)
    result2 = grouped.agg(groupby_func, *args)

    # Reduction or transformation kernels should preserve type
    slices = {"ngroup", "cumcount", "size"}
    if isinstance(obj, DataFrame) and groupby_func in slices:
        assert isinstance(result1, obj._constructor_sliced)
    else:
        assert isinstance(result1, type(obj))

    # Confirm .agg() groupby operations return same results
    if isinstance(result1, DataFrame):
        tm.assert_frame_equal(result1, result2)
    else:
        tm.assert_series_equal(result1, result2)
Ejemplo n.º 5
0
def test_transform_groupby_kernel_frame(
    axis, float_frame, op, using_array_manager, request
):
    # TODO(2.0) Remove after pad/backfill deprecation enforced
    op = maybe_normalize_deprecated_kernels(op)
    # GH 35964
    if using_array_manager and op == "pct_change" and axis in (1, "columns"):
        # TODO(ArrayManager) shift with axis=1
        request.node.add_marker(
            pytest.mark.xfail(
                reason="shift axis=1 not yet implemented for ArrayManager"
            )
        )

    args = [0.0] if op == "fillna" else []
    if axis == 0 or axis == "index":
        ones = np.ones(float_frame.shape[0])
    else:
        ones = np.ones(float_frame.shape[1])
    expected = float_frame.groupby(ones, axis=axis).transform(op, *args)
    result = float_frame.transform(op, axis, *args)
    tm.assert_frame_equal(result, expected)

    # same thing, but ensuring we have multiple blocks
    assert "E" not in float_frame.columns
    float_frame["E"] = float_frame["A"].copy()
    assert len(float_frame._mgr.arrays) > 1

    if axis == 0 or axis == "index":
        ones = np.ones(float_frame.shape[0])
    else:
        ones = np.ones(float_frame.shape[1])
    expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args)
    result2 = float_frame.transform(op, axis, *args)
    tm.assert_frame_equal(result2, expected2)
Ejemplo n.º 6
0
def test_transform_groupby_kernel_series(string_series, op):
    # GH 35964
    # TODO(2.0) Remove after pad/backfill deprecation enforced
    op = maybe_normalize_deprecated_kernels(op)
    args = [0.0] if op == "fillna" else []
    ones = np.ones(string_series.shape[0])
    expected = string_series.groupby(ones).transform(op, *args)
    result = string_series.transform(op, 0, *args)
    tm.assert_series_equal(result, expected)
Ejemplo n.º 7
0
def test_transform_groupby_kernel_series(request, string_series, op):
    # GH 35964
    if op == "ngroup":
        request.node.add_marker(
            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
        )
    # TODO(2.0) Remove after pad/backfill deprecation enforced
    op = maybe_normalize_deprecated_kernels(op)
    args = [0.0] if op == "fillna" else []
    ones = np.ones(string_series.shape[0])
    expected = string_series.groupby(ones).transform(op, *args)
    result = string_series.transform(op, 0, *args)
    tm.assert_series_equal(result, expected)