def test_write_json2(df: pl.DataFrame) -> None: # text-based conversion loses time info df = df.select(pl.all().exclude(["cat", "time"])) s = df.write_json(to_string=True) f = io.BytesIO() f.write(s.encode()) f.seek(0) out = pl.read_json(f) assert df.frame_equal(out, null_equal=True) file = io.BytesIO() df.write_json(file) file.seek(0) out = pl.read_json(file) assert df.frame_equal(out, null_equal=True)
def test_from_to_buffer(df: pl.DataFrame, compressions: List[str]) -> None: for compression in compressions: buf = io.BytesIO() df.write_ipc(buf, compression=compression) # type: ignore buf.seek(0) read_df = pl.read_ipc(buf) assert df.frame_equal(read_df)
def test_to_from_buffer(df: pl.DataFrame) -> None: df = df.drop("strings_nulls") for to_fn, from_fn, text_based in zip( [df.to_parquet, df.to_csv, df.to_ipc, df.to_json], [ pl.read_parquet, partial(pl.read_csv, parse_dates=True), pl.read_ipc, pl.read_json, ], [False, True, False, True], ): f = io.BytesIO() to_fn(f) # type: ignore f.seek(0) df_1 = from_fn(f) # type: ignore # some type information is lost due to text conversion if text_based: df_1 = df_1.with_columns([ pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time) ]) assert df.frame_equal(df_1)
def test_replace(): df = DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]}) s = Series("c", [True, False, True]) df.replace("a", s) assert df.frame_equal(DataFrame({ "c": [True, False, True], "b": [1, 2, 3] }))
def test_from_to_file(io_test_dir: str, example_df: pl.DataFrame, compressions: List[str]) -> None: f = os.path.join(io_test_dir, "small.avro") for compression in compressions: example_df.write_avro(f, compression=compression) # type: ignore df_read = pl.read_avro(str(f)) assert example_df.frame_equal(df_read)
def test_from_to_buffer(example_df: pl.DataFrame, compressions: list[str]) -> None: for compression in compressions: buf = io.BytesIO() example_df.write_avro( buf, compression=compression) # type: ignore[arg-type] buf.seek(0) read_df = pl.read_avro(buf) assert example_df.frame_equal(read_df)
def test_to_from_buffer(df: pl.DataFrame) -> None: for buf in (io.BytesIO(), io.StringIO()): df.write_json(buf) buf.seek(0) read_df = pl.read_json(buf) read_df = read_df.with_columns( [pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time)] ) assert df.frame_equal(read_df)
def test_from_to_file(io_test_dir: str, df: pl.DataFrame, compressions: List[str]) -> None: f = os.path.join(io_test_dir, "small.ipc") # does not yet work on windows because we hold an mmap? if os.name != "nt": for compression in compressions: df.write_ipc(f, compression=compression) # type: ignore df_read = pl.read_ipc(str(f)) assert df.frame_equal(df_read)
def test_to_from_buffer(df: pl.DataFrame) -> None: buf = io.BytesIO() df.write_csv(buf) buf.seek(0) read_df = pl.read_csv(buf, parse_dates=True) read_df = read_df.with_columns( [pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time)] ) assert df.frame_equal(read_df)
def test_to_from_file(io_test_dir: str, df: pl.DataFrame) -> None: df = df.drop("strings_nulls") f = os.path.join(io_test_dir, "small.csv") df.write_csv(f) read_df = pl.read_csv(f, parse_dates=True) read_df = read_df.with_columns( [pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time)] ) assert df.frame_equal(read_df)
def test_to_from_file(io_test_dir: str, df: pl.DataFrame, compressions: List[str]) -> None: f = os.path.join(io_test_dir, "small.parquet") for compression in compressions: if compression == "lzo": # lzo compression is not supported now with pytest.raises(pl.ArrowError): df.write_parquet(f, compression=compression) _ = pl.read_parquet(f) with pytest.raises(OSError): df.write_parquet(f, compression=compression, use_pyarrow=True) _ = pl.read_parquet(f) else: df.write_parquet(f, compression=compression) read_df = pl.read_parquet(f) assert df.frame_equal(read_df)
def test_to_from_buffer(df: pl.DataFrame, compressions: List[str]) -> None: for compression in compressions: if compression == "lzo": # lzo compression is not supported now with pytest.raises(pl.ArrowError): buf = io.BytesIO() df.write_parquet(buf, compression=compression) buf.seek(0) _ = pl.read_parquet(buf) with pytest.raises(OSError): buf = io.BytesIO() df.write_parquet(buf, compression=compression, use_pyarrow=True) buf.seek(0) _ = pl.read_parquet(buf) else: buf = io.BytesIO() df.write_parquet(buf, compression=compression) buf.seek(0) read_df = pl.read_parquet(buf) assert df.frame_equal(read_df)
def test_slice(): df = DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"]}) df = df.slice(1, 2) assert df.frame_equal(DataFrame({"a": [1, 3], "b": ["b", "c"]}))
def test_sort(): df = DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]}) df.sort("a", in_place=True) assert df.frame_equal(DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}))