Esempio n. 1
0
def test_lazy_functions():
    df = pl.DataFrame({
        "a": ["foo", "bar", "2"],
        "b": [1, 2, 3],
        "c": [1.0, 2.0, 3.0]
    })
    out = df[[pl.count("a")]]
    assert out[0] == 3
    assert pl.count(df["a"]) == 3
    out = df[[
        pl.var("b"),
        pl.std("b"),
        pl.max("b"),
        pl.min("b"),
        pl.sum("b"),
        pl.mean("b"),
        pl.median("b"),
        pl.n_unique("b"),
        pl.first("b"),
        pl.last("b"),
    ]]
    expected = 1.0
    assert np.isclose(out[0], expected)
    assert np.isclose(pl.var(df["b"]), expected)
    expected = 1.0
    assert np.isclose(out[1], expected)
    assert np.isclose(pl.std(df["b"]), expected)
    expected = 3
    assert np.isclose(out[2], expected)
    assert np.isclose(pl.max(df["b"]), expected)
    expected = 1
    assert np.isclose(out[3], expected)
    assert np.isclose(pl.min(df["b"]), expected)
    expected = 6
    assert np.isclose(out[4], expected)
    assert np.isclose(pl.sum(df["b"]), expected)
    expected = 2
    assert np.isclose(out[5], expected)
    assert np.isclose(pl.mean(df["b"]), expected)
    expected = 2
    assert np.isclose(out[6], expected)
    assert np.isclose(pl.median(df["b"]), expected)
    expected = 3
    assert np.isclose(out[7], expected)
    assert np.isclose(pl.n_unique(df["b"]), expected)
    expected = 1
    assert np.isclose(out[8], expected)
    assert np.isclose(pl.first(df["b"]), expected)
    expected = 3
    assert np.isclose(out[9], expected)
    assert np.isclose(pl.last(df["b"]), expected)
    expected = 3
    assert np.isclose(out[9], expected)
    assert np.isclose(pl.last(df["b"]), expected)
Esempio n. 2
0
def test_lazy_functions():
    df = pl.DataFrame({
        "a": ["foo", "bar", "2"],
        "b": [1, 2, 3],
        "c": [1.0, 2.0, 3.0]
    })
    out = df[[pl.count("a")]]
    assert out["a"] == 3
    assert pl.count(df["a"]) == 3
    out = df[[
        pl.var("b"),
        pl.std("b"),
        pl.max("b"),
        pl.min("b"),
        pl.sum("b"),
        pl.mean("b"),
        pl.median("b"),
        pl.n_unique("b"),
        pl.first("b"),
        pl.last("b"),
    ]]
    expected = 1.0
    assert np.isclose(out.select_at_idx(0), expected)
    assert np.isclose(pl.var(df["b"]), expected)
    expected = 1.0
    assert np.isclose(out.select_at_idx(1), expected)
    assert np.isclose(pl.std(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(2), expected)
    assert np.isclose(pl.max(df["b"]), expected)
    expected = 1
    assert np.isclose(out.select_at_idx(3), expected)
    assert np.isclose(pl.min(df["b"]), expected)
    expected = 6
    assert np.isclose(out.select_at_idx(4), expected)
    assert np.isclose(pl.sum(df["b"]), expected)
    expected = 2
    assert np.isclose(out.select_at_idx(5), expected)
    assert np.isclose(pl.mean(df["b"]), expected)
    expected = 2
    assert np.isclose(out.select_at_idx(6), expected)
    assert np.isclose(pl.median(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(7), expected)
    assert np.isclose(pl.n_unique(df["b"]), expected)
    expected = 1
    assert np.isclose(out.select_at_idx(8), expected)
    assert np.isclose(pl.first(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(9), expected)
    assert np.isclose(pl.last(df["b"]), expected)
    expected = 3
    assert np.isclose(out.select_at_idx(9), expected)
    assert np.isclose(pl.last(df["b"]), expected)
Esempio n. 3
0
def test_list_eval_expression() -> None:
    df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})

    for parallel in [True, False]:
        assert df.with_column(
            pl.concat_list(["a", "b"]).arr.eval(
                pl.first().rank(),
                parallel=parallel).alias("rank")).to_dict(False) == {
                    "a": [1, 8, 3],
                    "b": [4, 5, 2],
                    "rank": [[1.0, 2.0], [2.0, 1.0], [2.0, 1.0]],
                }

        assert df["a"].reshape(
            (1, -1)).arr.eval(pl.first(),
                              parallel=parallel).to_list() == [[1, 8, 3]]
Esempio n. 4
0
def test_window_function():
    df = pl.DataFrame({
        "A": [1, 2, 3, 4, 5],
        "fruits": ["banana", "banana", "apple", "apple", "banana"],
        "B": [5, 4, 3, 2, 1],
        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
    })

    q = df.lazy().with_columns([
        pl.sum("A").over("fruits").alias("fruit_sum_A"),
        pl.first("B").over("fruits").alias("fruit_first_B"),
        pl.max("B").over("cars").alias("cars_max_B"),
    ])
    out = q.collect()
    assert out["cars_max_B"] == [5, 4, 5, 5, 5]

    out = df[[pl.first("B").over(["fruits", "cars"]).alias("B_first")]]
    assert out["B_first"] == [5, 4, 3, 3, 5]
Esempio n. 5
0
import polars as pl

dataset = pl.DataFrame({
    "A": [1, 2, 3, 4, 5],
    "fruits": ["banana", "banana", "apple", "apple", "banana"],
    "B": [5, 4, 3, 2, 1],
    "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
})

q = dataset.lazy().with_columns([
    pl.sum("A").over("fruits").alias("fruit_sum_A"),
    pl.first("B").over("fruits").alias("fruit_first_B"),
    pl.max("B").over("cars").alias("cars_max_B"),
])

df = q.collect()
Esempio n. 6
0
import polars as pl

from .dataset import dataset

q = (dataset.lazy().groupby("first_name").agg(
    [pl.count("party"),
     pl.col("gender").list(),
     pl.first("last_name")]).sort("party_count", reverse=True).limit(5))

df = q.collect()