def test_null_count(df: pl.DataFrame) -> None: # note: the zero-row and zero-col cases are always passed as explicit examples null_count, ncols = df.null_count(), len(df.columns) if ncols == 0: assert null_count.shape == (0, 0) else: assert null_count.shape == (1, ncols) for idx, count in enumerate(null_count.rows()[0]): assert count == sum(v is None for v in df.select_at_idx(idx).to_list()) print(null_count.rows())
def test_strategy_null_probability( s: pl.Series, df1: pl.DataFrame, df2: pl.DataFrame, df3: pl.DataFrame, ) -> None: for obj in (s, df1, df2, df3): assert len(obj) == 50 # type: ignore[arg-type] assert s.null_count() < df1.null_count().fold(sum).sum() assert df1.null_count().fold(sum).sum() < df2.null_count().fold(sum).sum() assert df2.null_count().fold(sum).sum() < df3.null_count().fold(sum).sum() nulls_col0, nulls_col1 = df2.null_count().rows()[0] assert nulls_col0 > nulls_col1 assert nulls_col0 < 50 nulls_col0, nulls_colx = df3.null_count().rows()[0] assert nulls_col0 > nulls_colx assert nulls_col0 == 50
def test_null_count(): df = DataFrame({"a": [2, 1, 3], "b": ["a", "b", None]}) assert df.null_count().shape == (1, 2)