Example #1
0
def test_lazy_concat(df: pl.DataFrame) -> None:
    shape = df.shape
    shape = (shape[0] * 2, shape[1])

    out = pl.concat([df.lazy(), df.lazy()]).collect()
    assert out.shape == shape
    assert out.frame_equal(df.vstack(df.clone()), null_equal=True)
Example #2
0
def test_lazy():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    ldf = df.lazy().with_column(lit(1).alias("foo")).select(
        [col("a"), col("foo")])

    print(ldf.collect())
    # test if it executes
    new = (df.lazy().with_column(
        when(col("a").gt(lit(2))).then(lit(10)).otherwise(
            lit(1)).alias("new")).collect())
Example #3
0
def test_quantile(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().quantile(0.25, "nearest").collect()["A"][0] == 2
    assert fruits_cars.select(pl.col("A").quantile(0.25, "nearest"))["A"][0] == 2

    assert fruits_cars.lazy().quantile(0.24, "lower").collect()["A"][0] == 1
    assert fruits_cars.select(pl.col("A").quantile(0.24, "lower"))["A"][0] == 1

    assert fruits_cars.lazy().quantile(0.26, "higher").collect()["A"][0] == 3
    assert fruits_cars.select(pl.col("A").quantile(0.26, "higher"))["A"][0] == 3

    assert fruits_cars.lazy().quantile(0.24, "midpoint").collect()["A"][0] == 1.5
    assert fruits_cars.select(pl.col("A").quantile(0.24, "midpoint"))["A"][0] == 1.5

    assert fruits_cars.lazy().quantile(0.24, "linear").collect()["A"][0] == 1.96
    assert fruits_cars.select(pl.col("A").quantile(0.24, "linear"))["A"][0] == 1.96
Example #4
0
def test_last(fruits_cars: pl.DataFrame) -> None:
    assert (
        fruits_cars.lazy()
        .last()
        .collect()
        .frame_equal(fruits_cars[(len(fruits_cars) - 1) :, :])
    )
Example #5
0
def test_binary_function():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = (
        df.lazy()
        .with_column(map_binary(col("a"), col("b"), lambda a, b: a + b))
        .collect()
    )
    assert out["binary_function"] == (out.a + out.b)
Example #6
0
def test_set_null():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = (df.lazy().with_column(
        when(col("a") > 1).then(
            lit(None)).otherwise(100).alias("foo")).collect())
    s = out["foo"]
    assert s[0] == 100
    assert s[1] is None
    assert s[2] is None
Example #7
0
def test_shift(fruits_cars: pl.DataFrame) -> None:
    df = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]})
    out = df.select(col("a").shift(1))
    assert out["a"].series_equal(pl.Series("a", [None, 1, 2, 3, 4]),
                                 null_equal=True)

    res = fruits_cars.lazy().shift(2).collect()

    expected = pl.DataFrame({
        "A": [None, None, 1, 2, 3],
        "fruits": [None, None, "banana", "banana", "apple"],
        "B": [None, None, 5, 4, 3],
        "cars": [None, None, "beetle", "audi", "beetle"],
    })
    res.frame_equal(expected, null_equal=True)

    # negative value
    res = fruits_cars.lazy().shift(-2).collect()
    for rows in [3, 4]:
        for cols in range(4):
            assert res[rows, cols] is None
Example #8
0
def test_join():
    df_left = DataFrame({
        "a": ["a", "b", "a", "z"],
        "b": [1, 2, 3, 4],
        "c": [6, 5, 4, 3],
    })
    df_right = DataFrame({
        "a": ["b", "c", "b", "a"],
        "k": [0, 3, 9, 6],
        "c": [1, 0, 2, 1],
    })

    joined = df_left.join(df_right, left_on="a", right_on="a").sort("a")
    assert joined["b"].series_equal(Series("", [1, 3, 2, 2]))
    joined = df_left.join(df_right, left_on="a", right_on="a",
                          how="left").sort("a")
    assert joined["c_right"].is_null().sum() == 1
    assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4]))
    joined = df_left.join(df_right, left_on="a", right_on="a",
                          how="outer").sort("a")
    assert joined["c_right"].null_count() == 1
    assert joined["c"].null_count() == 2
    assert joined["b"].null_count() == 2

    df_a = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})
    df_b = DataFrame({
        "foo": [1, 1, 1],
        "bar": ["a", "c", "c"],
        "ham": ["let", "var", "const"]
    })

    # just check if join on multiple columns runs
    df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"])

    eager_join = df_a.join(df_b, left_on="a", right_on="foo")

    lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a",
                                 right_on="foo").collect()
    assert lazy_join.shape == eager_join.shape
Example #9
0
def test_custom_groupby():
    df = DataFrame({"A": ["a", "a", "c", "c"], "B": [1, 3, 5, 2]})
    assert df.groupby("A").select("B").apply(lambda x: x.sum()).shape == (2, 2)
    assert df.groupby("A").select("B").apply(
        lambda x: Series("", np.array(x))).shape == (
            2,
            2,
        )

    df = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})

    out = (df.lazy().groupby("b").agg(
        [col("a").apply(lambda x: x.sum(), dtype_out=int)]).collect())
    assert out.shape == (3, 2)
Example #10
0
def test_std(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().std().collect()["A"][0] == pytest.approx(
        1.5811388300841898)
Example #11
0
def test_first(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().first().collect().frame_equal(fruits_cars[0, :])
Example #12
0
def test_groupby_apply():
    df = DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 3.0]})
    ldf = df.lazy().groupby("a").apply(lambda df: df)
    assert ldf.collect().sort("b").frame_equal(df)
Example #13
0
def test_var(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().var().collect()["A"][0] == pytest.approx(2.5)
Example #14
0
def test_fold():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = df.lazy().select(pl.sum(["a", "b"])).collect()
    assert out["sum"].series_equal(Series("sum", [2, 4, 6]))
Example #15
0
def test_or():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = df.lazy().filter((pl.col("a") == 1) | (pl.col("b") > 2)).collect()
    assert out.shape[0] == 2
Example #16
0
def test_apply():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    new = df.lazy().with_column(
        col("a").map(lambda s: s * 2).alias("foo")).collect()
Example #17
0
def test_agg():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    ldf = df.lazy().min()
    assert ldf.collect().shape == (1, 2)
Example #18
0
def test_add_eager_column():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = df.lazy().with_column(pl.lit(pl.Series("c", [1, 2, 3]))).collect()
    assert out["c"].sum() == 6
Example #19
0
def test_median(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().median().collect()["A"][0] == 3
    assert fruits_cars.select(pl.col("A").median())["A"][0] == 3
Example #20
0
def test_collect_all(df: pl.DataFrame, no_optimization: bool) -> None:
    lf1 = df.lazy().select(pl.col("int").sum())
    lf2 = df.lazy().select((pl.col("floats") * 2).sum())
    out = pl.collect_all([lf1, lf2], no_optimization=no_optimization)
    assert out[0][0, 0] == 6
    assert out[1][0, 0] == 12.0
Example #21
0
def test_fetch(fruits_cars: pl.DataFrame) -> None:
    res = fruits_cars.lazy().select("*").fetch(2)
    assert res.frame_equal(res[:2])
Example #22
0
def test_tail(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().tail(2).collect().frame_equal(fruits_cars[3:, :])
Example #23
0
def test_head(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().head(2).collect().frame_equal(fruits_cars[:2, :])
Example #24
0
def test_with_column_renamed(fruits_cars: pl.DataFrame) -> None:
    res = fruits_cars.lazy().rename({"A": "C"}).collect()
    assert res.columns[0] == "C"