def test_list_concat_rolling_window() -> None: # inspired by: https://stackoverflow.com/questions/70377100/use-the-rolling-function-of-polars-to-get-a-list-of-all-values-in-the-rolling-wi # this tests if it works without specifically creating list dtype upfront. # note that the given answer is prefered over this snippet as that reuses the list array when shifting df = pl.DataFrame({ "A": [1.0, 2.0, 9.0, 2.0, 13.0], }) out = df.with_columns( [pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)]).select([ pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling") ]) assert out.shape == (5, 1) assert out.to_series().dtype == pl.List # this test proper null behavior of concat list out = ( df.with_column(pl.col("A").reshape((-1, 1))) # first turn into a list .with_columns([ pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3) # slice the lists to a lag ]).select([ pl.all(), pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling"), ])) assert out.shape == (5, 5) assert out["A_rolling"].dtype == pl.List
def test_list_concat_supertype() -> None: df = pl.DataFrame( [pl.Series("a", [1, 2], pl.UInt8), pl.Series("b", [10000, 20000], pl.UInt16)] ) assert df.with_column(pl.concat_list(pl.col(["a", "b"])).alias("concat_list"))[ "concat_list" ].to_list() == [[1, 10000], [2, 20000]]
def test_list_eval_dtype_inference() -> None: grades = pl.DataFrame( { "student": ["bas", "laura", "tim", "jenny"], "arithmetic": [10, 5, 6, 8], "biology": [4, 6, 2, 7], "geography": [8, 4, 9, 7], } ) rank_pct = pl.col("").rank(reverse=True) / pl.col("").count().cast(pl.UInt16) # the .arr.first() would fail if .arr.eval did not correctly infer the output type assert grades.with_column( pl.concat_list(pl.all().exclude("student")).alias("all_grades") ).select( [ pl.col("all_grades") .arr.eval(rank_pct, parallel=True) .alias("grades_rank") .arr.first() ] ).to_series().to_list() == [ 0.3333333432674408, 0.6666666865348816, 0.6666666865348816, 0.3333333432674408, ]
def test_list_concat_nulls() -> None: assert pl.DataFrame( { "a": [["a", "b"], None, ["c", "d", "e"], None], "t": [["x"], ["y"], None, None], } ).with_column(pl.concat_list(["a", "t"]).alias("concat"))["concat"].to_list() == [ ["a", "b", "x"], None, None, None, ]
def test_list_eval_expression() -> None: df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) for parallel in [True, False]: assert df.with_column( pl.concat_list(["a", "b"]).arr.eval( pl.first().rank(), parallel=parallel).alias("rank")).to_dict(False) == { "a": [1, 8, 3], "b": [4, 5, 2], "rank": [[1.0, 2.0], [2.0, 1.0], [2.0, 1.0]], } assert df["a"].reshape( (1, -1)).arr.eval(pl.first(), parallel=parallel).to_list() == [[1, 8, 3]]
def test_list_concat_dispatch() -> None: s0 = pl.Series("a", [[1, 2]]) s1 = pl.Series("b", [[3, 4, 5]]) expected = pl.Series("a", [[1, 2, 3, 4, 5]]) out = s0.arr.concat([s1]) assert out.series_equal(expected) out = s0.arr.concat(s1) assert out.series_equal(expected) df = pl.DataFrame([s0, s1]) assert df.select(pl.concat_list(["a", "b"]).alias("a"))["a"].series_equal(expected) assert df.select(pl.col("a").arr.concat("b").alias("a"))["a"].series_equal(expected) assert df.select(pl.col("a").arr.concat(["b"]).alias("a"))["a"].series_equal( expected )