Esempio n. 1
0
def test_lazy_functions():
    df = pl.DataFrame({
        "a": ["foo", "bar", "2"],
        "b": [1, 2, 3],
        "c": [1.0, 2.0, 3.0]
    })
    out = df[[pl.count("a")]]
    assert out[0] == 3
    assert pl.count(df["a"]) == 3
    out = df[[
        pl.var("b"),
        pl.std("b"),
        pl.max("b"),
        pl.min("b"),
        pl.sum("b"),
        pl.mean("b"),
        pl.median("b"),
        pl.n_unique("b"),
        pl.first("b"),
        pl.last("b"),
    ]]
    expected = 1.0
    assert np.isclose(out[0], expected)
    assert np.isclose(pl.var(df["b"]), expected)
    expected = 1.0
    assert np.isclose(out[1], expected)
    assert np.isclose(pl.std(df["b"]), expected)
    expected = 3
    assert np.isclose(out[2], expected)
    assert np.isclose(pl.max(df["b"]), expected)
    expected = 1
    assert np.isclose(out[3], expected)
    assert np.isclose(pl.min(df["b"]), expected)
    expected = 6
    assert np.isclose(out[4], expected)
    assert np.isclose(pl.sum(df["b"]), expected)
    expected = 2
    assert np.isclose(out[5], expected)
    assert np.isclose(pl.mean(df["b"]), expected)
    expected = 2
    assert np.isclose(out[6], expected)
    assert np.isclose(pl.median(df["b"]), expected)
    expected = 3
    assert np.isclose(out[7], expected)
    assert np.isclose(pl.n_unique(df["b"]), expected)
    expected = 1
    assert np.isclose(out[8], expected)
    assert np.isclose(pl.first(df["b"]), expected)
    expected = 3
    assert np.isclose(out[9], expected)
    assert np.isclose(pl.last(df["b"]), expected)
    expected = 3
    assert np.isclose(out[9], expected)
    assert np.isclose(pl.last(df["b"]), expected)
Esempio n. 2
0
def test_lazy_functions():
    df = pl.DataFrame({
        "a": ["foo", "bar", "2"],
        "b": [1, 2, 3],
        "c": [1.0, 2.0, 3.0]
    })
    out = df[[pl.count("a")]]
    assert out["a"] == 3
    assert pl.count(df["a"]) == 3
    out = df[[
        pl.var("b"),
        pl.std("b"),
        pl.max("b"),
        pl.min("b"),
        pl.sum("b"),
        pl.mean("b"),
        pl.median("b"),
        pl.n_unique("b"),
        pl.first("b"),
        pl.last("b"),
    ]]
    expected = 1.0
    assert np.isclose(out.select_at_idx(0), expected)
    assert np.isclose(pl.var(df["b"]), expected)
    expected = 1.0
    assert np.isclose(out.select_at_idx(1), expected)
    assert np.isclose(pl.std(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(2), expected)
    assert np.isclose(pl.max(df["b"]), expected)
    expected = 1
    assert np.isclose(out.select_at_idx(3), expected)
    assert np.isclose(pl.min(df["b"]), expected)
    expected = 6
    assert np.isclose(out.select_at_idx(4), expected)
    assert np.isclose(pl.sum(df["b"]), expected)
    expected = 2
    assert np.isclose(out.select_at_idx(5), expected)
    assert np.isclose(pl.mean(df["b"]), expected)
    expected = 2
    assert np.isclose(out.select_at_idx(6), expected)
    assert np.isclose(pl.median(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(7), expected)
    assert np.isclose(pl.n_unique(df["b"]), expected)
    expected = 1
    assert np.isclose(out.select_at_idx(8), expected)
    assert np.isclose(pl.first(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(9), expected)
    assert np.isclose(pl.last(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(9), expected)
    assert np.isclose(pl.last(df["b"]), expected)
Esempio n. 3
0
File: main.py Progetto: ghuls/polars
print("q5")
out = x.groupby("id6").agg([pl.sum("v1"),
                            pl.sum("v2"),
                            pl.sum("v3")]).collect()
print(time.time() - t0)
print("out.shape", out.shape)
print('out["v1_sum"].sum()', out["v1_sum"].sum())
print('out["v2_sum"].sum()', out["v2_sum"].sum())
easy_time = time.time() - t0easy
t0advanced = time.time()

t0 = time.time()
print("q6")
out = (x.groupby(["id4", "id5"]).agg(
    [pl.median("v3").alias("v3_median"),
     pl.std("v3").alias("v3_std")]).collect())
print(time.time() - t0)
print("out.shape", out.shape)
print('out["v3_median"].sum()', out["v3_median"].sum())
print('out["v3_std"].sum()', out["v3_std"].sum())

t0 = time.time()
print("q7")
out = (x.groupby("id3").agg([(pl.max("v1") - pl.min("v2")).alias("range_v1_v2")
                             ]).collect())
print(time.time() - t0)
print("out.shape", out.shape)
print('out["range_v1_v2"].sum()', out["range_v1_v2"].sum())

t0 = time.time()
print("q8")
Esempio n. 4
0
 def stdize_out(value: str, control_for: str) -> pl.Expr:
     return (pl.col(value) - pl.mean(value).over(control_for)) / pl.std(value).over(
         control_for
     )
from .dataset import df
import polars as pl
from polars import col

df = df[[
    pl.sum("random").alias("sum"),
    pl.min("random").alias("min"),
    pl.max("random").alias("max"),
    col("random").max().alias("other_max"),
    pl.std("random").alias("std dev"),
    pl.var("random").alias("variance"),
]]