Example #1
0
def test_head_tail(fruits_cars: pl.DataFrame) -> None:
    res_expr = fruits_cars.select([pl.head("A", 2)])
    res_series = pl.head(fruits_cars["A"], 2)
    expected = pl.Series("A", [1, 2])
    assert res_expr.to_series(0).series_equal(expected)
    assert res_series.series_equal(expected)

    res_expr = fruits_cars.select([pl.tail("A", 2)])
    res_series = pl.tail(fruits_cars["A"], 2)
    expected = pl.Series("A", [4, 5])
    assert res_expr.to_series(0).series_equal(expected)
    assert res_series.series_equal(expected)
Example #2
0
def test_quantile(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().quantile(0.25, "nearest").collect()["A"][0] == 2
    assert fruits_cars.select(pl.col("A").quantile(0.25, "nearest"))["A"][0] == 2

    assert fruits_cars.lazy().quantile(0.24, "lower").collect()["A"][0] == 1
    assert fruits_cars.select(pl.col("A").quantile(0.24, "lower"))["A"][0] == 1

    assert fruits_cars.lazy().quantile(0.26, "higher").collect()["A"][0] == 3
    assert fruits_cars.select(pl.col("A").quantile(0.26, "higher"))["A"][0] == 3

    assert fruits_cars.lazy().quantile(0.24, "midpoint").collect()["A"][0] == 1.5
    assert fruits_cars.select(pl.col("A").quantile(0.24, "midpoint"))["A"][0] == 1.5

    assert fruits_cars.lazy().quantile(0.24, "linear").collect()["A"][0] == 1.96
    assert fruits_cars.select(pl.col("A").quantile(0.24, "linear"))["A"][0] == 1.96
Example #3
0
def test_is_between(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, False, True, False, False]))
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4, False))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, False, True, False, False]))
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4,
        [False, False]))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, False, True, False, False]))
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4, True))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, True, True, True, False]))
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4,
        [True, True]))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, True, True, True, False]))
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4,
        [False, True]))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, False, True, True, False]))
    assert fruits_cars.select(pl.col("A").is_between(
        2, 4,
        [True, False]))["is_between"].series_equal(  # type: ignore[arg-type]
            pl.Series("is_between", [False, True, True, False, False]))
Example #4
0
def test_write_json2(df: pl.DataFrame) -> None:
    # text-based conversion loses time info
    df = df.select(pl.all().exclude(["cat", "time"]))
    s = df.write_json(to_string=True)
    f = io.BytesIO()
    f.write(s.encode())
    f.seek(0)
    out = pl.read_json(f)
    assert df.frame_equal(out, null_equal=True)

    file = io.BytesIO()
    df.write_json(file)
    file.seek(0)
    out = pl.read_json(file)
    assert df.frame_equal(out, null_equal=True)
Example #5
0
def df_no_lists(df: pl.DataFrame) -> pl.DataFrame:
    return df.select(
        pl.all().exclude(["list_str", "list_int", "list_bool", "list_int", "list_flt"])
    )
Example #6
0
def test_cov(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.select(pl.cov("A", "B"))[0, 0] == -2.5
    assert fruits_cars.select(pl.cov(pl.col("A"), pl.col("B")))[0, 0] == -2.5
Example #7
0
def test_any_expr(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.with_column(pl.col("A").cast(bool)).select(
        pl.any("A"))[0, 0]
    assert fruits_cars.select(pl.any([pl.col("A"), pl.col("B")]))[0, 0]
Example #8
0
def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None:
    res_expr = fruits_cars.select(pl.col("A").lower_bound())
    assert res_expr["A"][0] < -10_000_000
    res_expr = fruits_cars.select(pl.col("A").upper_bound())
    assert res_expr["A"][0] > 10_000_000
Example #9
0
def test_max_min_multiple_columns(fruits_cars: pl.DataFrame) -> None:
    res = fruits_cars.select(pl.max(["A", "B"]).alias("max"))
    assert res.to_series(0).series_equal(pl.Series("max", [5, 4, 3, 4, 5]))

    res = fruits_cars.select(pl.min(["A", "B"]).alias("min"))
    assert res.to_series(0).series_equal(pl.Series("min", [1, 2, 3, 2, 1]))
Example #10
0
def test_median(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.lazy().median().collect()["A"][0] == 3
    assert fruits_cars.select(pl.col("A").median())["A"][0] == 3
Example #11
0
def test_any_expr(fruits_cars: pl.DataFrame) -> None:
    assert fruits_cars.select(pl.any("A"))[0, 0]
    assert fruits_cars.select(pl.any([pl.col("A"), pl.col("B")]))[0, 0]