Python DataFrame.groupby Examples

Programming Language: Python

Namespace/Package Name: polars

Class/Type: DataFrame

Method/Function: groupby

Examples at hotexamples.com: 3

Python DataFrame.groupby - 3 examples found. These are the top rated real world Python examples of polars.DataFrame.groupby extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataFrame(30)

lazy(24)

frame_equal(14)

select(11)

drop(4)

write_ipc(4)

write_json(3)

select_at_idx(3)

null_count(3)

write_parquet(3)

groupby(3)

join(2)

write_csv(2)

write_avro(2)

shift(2)

tail(1)

downsample(1)

drop_in_place(1)

with_column(1)

vstack(1)

to_pandas(1)

to_dummies(1)

slice(1)

sort(1)

hstack(1)

select_idx(1)

fold(1)

clone(1)

row(1)

replace(1)

read_csv(1)

head(1)

melt(1)

rows(1)

Example #1

Show file

def test_custom_groupby():
    df = DataFrame({"A": ["a", "a", "c", "c"], "B": [1, 3, 5, 2]})
    assert df.groupby("A").select("B").apply(lambda x: x.sum()).shape == (2, 2)
    assert df.groupby("A").select("B").apply(
        lambda x: Series("", np.array(x))).shape == (
            2,
            2,
        )

    df = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})

    out = (df.lazy().groupby("b").agg(
        [col("a").apply(lambda x: x.sum(), dtype_out=int)]).collect())
    assert out.shape == (3, 2)

Example #2

Show file

File: test_df.py Project: thecodeempire/polars

def test_groupby():
    df = DataFrame(
        {
            "a": ["a", "b", "a", "b", "b", "c"],
            "b": [1, 2, 3, 4, 5, 6],
            "c": [6, 5, 4, 3, 2, 1],
        }
    )
    assert df.groupby(by="a", select="b", agg="sum").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [4, 11, 6]})
    )
    assert df.groupby(by="a", select="c", agg="sum").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [10, 10, 1]})
    )
    assert df.groupby(by="a", select="b", agg="min").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [1, 2, 6]})
    )
    assert df.groupby(by="a", select="b", agg="min").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [1, 2, 6]})
    )
    assert df.groupby(by="a", select="b", agg="max").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [3, 5, 6]})
    )
    assert df.groupby(by="a", select="b", agg="mean").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [2.0, (2 + 4 + 5) / 3, 6.0]})
    )

    # TODO: is false because count is u32
    df.groupby(by="a", select="b", agg="count").frame_equal(
        DataFrame({"a": ["a", "b", "c"], "": [2, 3, 1]})
    )

Example #3

Show file

def test_groupby():
    df = DataFrame({
        "a": ["a", "b", "a", "b", "b", "c"],
        "b": [1, 2, 3, 4, 5, 6],
        "c": [6, 5, 4, 3, 2, 1],
    })

    # use __getitem__ to map to select
    assert (df.groupby("a")["b"].sum().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [4, 11, 6]
        })))

    assert (df.groupby("a").select("b").sum().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [4, 11, 6]
        })))
    assert (df.groupby("a").select("c").sum().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [10, 10, 1]
        })))
    assert (df.groupby("a").select("b").min().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [1, 2, 6]
        })))
    assert (df.groupby("a").select("b").max().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [3, 5, 6]
        })))
    assert (df.groupby("a").select("b").mean().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [2.0, (2 + 4 + 5) / 3, 6.0]
        })))
    assert (df.groupby("a").select("b").last().sort(by="a").frame_equal(
        DataFrame({
            "a": ["a", "b", "c"],
            "": [3, 5, 6]
        })))
    # check if it runs
    (df.groupby("a").select("b").n_unique())

    (df.groupby("a").select("b").quantile(0.3))
    (df.groupby("a").select("b").agg_list())

    gb_df = df.groupby("a").agg({"b": ["sum", "min"], "c": "count"})
    assert "b_sum" in gb_df.columns
    assert "b_min" in gb_df.columns

    #
    # # TODO: is false because count is u32
    # df.groupby(by="a", select="b", agg="count").frame_equal(
    #     DataFrame({"a": ["a", "b", "c"], "": [2, 3, 1]})
    # )
    assert df.groupby("a").apply(lambda df: df[["c"]].sum()).sort(
        "c")["c"][0] == 1

    assert df.groupby("a").groups().sort("a")["a"].series_equal(
        Series(["a", "b", "c"]))

    for subdf in df.groupby("a"):
        if subdf["a"][0] == "b":
            assert subdf.shape == (3, 3)

    assert df.groupby("a").get_group("c").shape == (1, 3)
    assert df.groupby("a").get_group("b").shape == (3, 3)
    assert df.groupby("a").get_group("a").shape == (2, 3)

    # Use lazy API in eager groupby
    assert df.groupby("a").agg([pl.sum("b")]).shape == (3, 2)