コード例 #1
0
def facet_pages(column):
    base_plot = [
        aes(x='wt', y='mpg', label='name'),
        geom_text(),
    ]
    for label, group_data in mtcars.groupby(column):
        yield ggplot(group_data) + base_plot + ggtitle(label)
コード例 #2
0
def test_sorting_within_groups_head():
    actual = dp(mtcars).groupby(X.cyl).print().sort_values("qsec").tail(1).pd
    dfs = []
    for cyl, sub_df in mtcars.groupby("cyl"):
        sub_df = sub_df.sort_values("qsec")
        dfs.append(sub_df.tail(1))
    should = pd.concat(dfs)[actual.columns]
    assert_frame_equal(should, actual)
コード例 #3
0
def test_filter_by_vector_grouped():
    actual = dp(mtcars).groupby("cyl").filter_by(X.hp.rank() <= 2).ungroup().pd
    keep = set()
    for grp, sub_df in mtcars.groupby("cyl"):
        keep.update(sub_df["name"][sub_df["hp"].rank() <= 2])
    should = mtcars[mtcars.name.isin(keep)]
    assert set(should.columns) == set(actual.columns)
    should = should[actual.columns]
    assert_frame_equal(actual, should)
コード例 #4
0
def test_grouped_mutate_returns_scalar():
    actual = (dp(mtcars).groupby("cyl").mutate(
        count=4).select("count").ungroup().pd.sort_index())
    should = mtcars.groupby("cyl").agg("count")["name"]
    should = ordered_DataFrame({
        "cyl": mtcars.cyl,
        "count": 4
    },
                               index=mtcars.index)
    assert_frame_equal(should, actual)
コード例 #5
0
def test_grouped_filter_by_returns_series():
    actual = (dp(mtcars).groupby("cyl").filter_by({
        grp: sub_df.hp.rank(ascending=False) <= 2
        for (grp, sub_df) in X.itergroups()
    }).ungroup().pd.sort_index())
    keep = set()
    for grp, sub_df in mtcars.groupby("cyl"):
        keep.update(sub_df["name"][sub_df["hp"].rank(ascending=False) <= 2])
    should = mtcars[mtcars.name.isin(keep)]
    assert set(should.columns) == set(actual.columns)
    should = should[actual.columns]
    assert_frame_equal(should, actual)
コード例 #6
0
def test_groupby_two_mutate_grouped():
    actual = (dp(mtcars).groupby(["cyl", "vs"]).mutate(
        grp_rank={grp: sub_df.hp.rank()
                  for (grp, sub_df) in X.itergroups()}).select(
                      "grp_rank").ungroup().pd.sort_index())
    ac = []
    for grp, sub_df in mtcars.groupby(["cyl", "vs"]):
        x = sub_df["hp"].rank()
        ac.append(x)
    ac = pd.concat(ac)
    should = mtcars.assign(grp_rank=ac)[["cyl", "vs", "grp_rank"]]
    assert_frame_equal(should, actual)
コード例 #7
0
def test_grouped_mutate_callable():
    actual = (dp(mtcars).groupby("cyl").mutate(
        max_hp=lambda x: x["hp"].max()).select(["cyl", "max_hp",
                                                "name"]).ungroup().pd)
    ac = []
    for grp, sub_df in mtcars.groupby("cyl"):
        x = pd.Series(sub_df["hp"].max(), index=sub_df.index)
        ac.append(x)
    ac = pd.concat(ac)
    should = mtcars.assign(max_hp=ac)[["cyl", "max_hp",
                                       "name"]].sort_values("name")
    assert_frame_equal(should, actual.sort_values("name"))
コード例 #8
0
def test_grouped_mutate_returns_scalar_per_group_str():
    actual = (dp(mtcars).groupby("cyl").mutate(count={
        grp: "X" + str(len(sub_df))
        for (grp, sub_df) in X.itergroups()
    }).select("count").ungroup().pd.sort_index())
    should = mtcars.groupby("cyl").agg("count")["name"]
    should = ordered_DataFrame(
        {
            "cyl": mtcars.cyl,
            "count": ["X" + str(should[cyl]) for cyl in mtcars.cyl]
        },
        index=mtcars.index,
    )
    assert_frame_equal(should, actual)
コード例 #9
0
def test_interleaved_context_managers():
    with dppd(mtcars) as (dpX, X):
        with dppd(diamonds) as (dpY, Y):
            dpX.groupby("cyl")
            dpY.filter_by(Y.cut == "Ideal")
            dpX.summarize(("hp", np.mean, "mean_hp"))
            dpY.summarize(("price", np.max, "max_price"))
    should_X = (mtcars.groupby("cyl")[["hp"]].agg(
        np.mean).rename(columns={"hp": "mean_hp"})).reset_index()
    should_Y = (pd.DataFrame(diamonds[diamonds.cut == "Ideal"].max()[[
        "price"
    ]]).transpose().rename(columns={"price": "max_price"}))
    should_Y["max_price"] = should_Y["max_price"].astype(int)
    assert_frame_equal(X, should_X)
    assert_frame_equal(Y, should_Y)
コード例 #10
0
def test_iter_tuples_in_group_by():
    actual = {k: list(v) for (k, v) in dp(mtcars).groupby("cyl").itertuples()}
    should = {}
    for key, sub_df in mtcars.groupby("cyl"):
        should[key, ] = list(sub_df.itertuples())
    assert actual == should
コード例 #11
0
def test_grouped_filter_by_X_apply():
    actual = dp(mtcars).groupby("cyl").filter_by(
        X.apply(len) > 10).ungroup().pd
    g = mtcars.groupby("cyl").apply(len) > 10
    should = mtcars[mtcars.cyl.isin(g.index[g])]
    assert_frame_equal(should, actual, check_column_order=False)
コード例 #12
0
def test_groupby_within_chain_select_on_group():
    actual = dp(mtcars).groupby("cyl").select("hp").mean().pd
    should = mtcars.groupby("cyl").mean()[["hp"]]
    assert_frame_equal(should, actual)
コード例 #13
0
def test_basic_summary():
    actual = dp(mtcars).groupby("cyl").summarize((X.hp, len, "count")).pd
    should = mtcars.groupby("cyl")[["hp"]].agg("count")
    should.columns = ["count"]
    should = should.reset_index()
    assert_frame_equal(should, actual)  # will fail
コード例 #14
0
def test_sorting_within_groups_head_ungroup():
    actual = dp(mtcars).groupby(X.cyl).arrange("qsec").ungroup().tail(1).pd
    for cyl, sub_df in mtcars.groupby("cyl"):
        sub_df = sub_df.sort_values("qsec")
        should = sub_df.tail(1)[actual.columns]
    assert_frame_equal(should, actual)