def test_lazy_functions(): df = pl.DataFrame({ "a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0] }) out = df[[pl.count("a")]] assert out[0] == 3 assert pl.count(df["a"]) == 3 out = df[[ pl.var("b"), pl.std("b"), pl.max("b"), pl.min("b"), pl.sum("b"), pl.mean("b"), pl.median("b"), pl.n_unique("b"), pl.first("b"), pl.last("b"), ]] expected = 1.0 assert np.isclose(out[0], expected) assert np.isclose(pl.var(df["b"]), expected) expected = 1.0 assert np.isclose(out[1], expected) assert np.isclose(pl.std(df["b"]), expected) expected = 3 assert np.isclose(out[2], expected) assert np.isclose(pl.max(df["b"]), expected) expected = 1 assert np.isclose(out[3], expected) assert np.isclose(pl.min(df["b"]), expected) expected = 6 assert np.isclose(out[4], expected) assert np.isclose(pl.sum(df["b"]), expected) expected = 2 assert np.isclose(out[5], expected) assert np.isclose(pl.mean(df["b"]), expected) expected = 2 assert np.isclose(out[6], expected) assert np.isclose(pl.median(df["b"]), expected) expected = 3 assert np.isclose(out[7], expected) assert np.isclose(pl.n_unique(df["b"]), expected) expected = 1 assert np.isclose(out[8], expected) assert np.isclose(pl.first(df["b"]), expected) expected = 3 assert np.isclose(out[9], expected) assert np.isclose(pl.last(df["b"]), expected) expected = 3 assert np.isclose(out[9], expected) assert np.isclose(pl.last(df["b"]), expected)
def test_lazy_functions(): df = pl.DataFrame({ "a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0] }) out = df[[pl.count("a")]] assert out["a"] == 3 assert pl.count(df["a"]) == 3 out = df[[ pl.var("b"), pl.std("b"), pl.max("b"), pl.min("b"), pl.sum("b"), pl.mean("b"), pl.median("b"), pl.n_unique("b"), pl.first("b"), pl.last("b"), ]] expected = 1.0 assert np.isclose(out.select_at_idx(0), expected) assert np.isclose(pl.var(df["b"]), expected) expected = 1.0 assert np.isclose(out.select_at_idx(1), expected) assert np.isclose(pl.std(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(2), expected) assert np.isclose(pl.max(df["b"]), expected) expected = 1 assert np.isclose(out.select_at_idx(3), expected) assert np.isclose(pl.min(df["b"]), expected) expected = 6 assert np.isclose(out.select_at_idx(4), expected) assert np.isclose(pl.sum(df["b"]), expected) expected = 2 assert np.isclose(out.select_at_idx(5), expected) assert np.isclose(pl.mean(df["b"]), expected) expected = 2 assert np.isclose(out.select_at_idx(6), expected) assert np.isclose(pl.median(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(7), expected) assert np.isclose(pl.n_unique(df["b"]), expected) expected = 1 assert np.isclose(out.select_at_idx(8), expected) assert np.isclose(pl.first(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(9), expected) assert np.isclose(pl.last(df["b"]), expected) expected = 3 assert np.isclose(out.select_at_idx(9), expected) assert np.isclose(pl.last(df["b"]), expected)
print("q5") out = x.groupby("id6").agg([pl.sum("v1"), pl.sum("v2"), pl.sum("v3")]).collect() print(time.time() - t0) print("out.shape", out.shape) print('out["v1_sum"].sum()', out["v1_sum"].sum()) print('out["v2_sum"].sum()', out["v2_sum"].sum()) easy_time = time.time() - t0easy t0advanced = time.time() t0 = time.time() print("q6") out = (x.groupby(["id4", "id5"]).agg( [pl.median("v3").alias("v3_median"), pl.std("v3").alias("v3_std")]).collect()) print(time.time() - t0) print("out.shape", out.shape) print('out["v3_median"].sum()', out["v3_median"].sum()) print('out["v3_std"].sum()', out["v3_std"].sum()) t0 = time.time() print("q7") out = (x.groupby("id3").agg([(pl.max("v1") - pl.min("v2")).alias("range_v1_v2") ]).collect()) print(time.time() - t0) print("out.shape", out.shape) print('out["range_v1_v2"].sum()', out["range_v1_v2"].sum()) t0 = time.time() print("q8")
def stdize_out(value: str, control_for: str) -> pl.Expr: return (pl.col(value) - pl.mean(value).over(control_for)) / pl.std(value).over( control_for )
from .dataset import df import polars as pl from polars import col df = df[[ pl.sum("random").alias("sum"), pl.min("random").alias("min"), pl.max("random").alias("max"), col("random").max().alias("other_max"), pl.std("random").alias("std dev"), pl.var("random").alias("variance"), ]]