Example #1
0
def test_write_json2(df: pl.DataFrame) -> None:
    # text-based conversion loses time info
    df = df.select(pl.all().exclude(["cat", "time"]))
    s = df.write_json(to_string=True)
    f = io.BytesIO()
    f.write(s.encode())
    f.seek(0)
    out = pl.read_json(f)
    assert df.frame_equal(out, null_equal=True)

    file = io.BytesIO()
    df.write_json(file)
    file.seek(0)
    out = pl.read_json(file)
    assert df.frame_equal(out, null_equal=True)
Example #2
0
def test_from_to_buffer(df: pl.DataFrame, compressions: List[str]) -> None:
    for compression in compressions:
        buf = io.BytesIO()
        df.write_ipc(buf, compression=compression)  # type: ignore
        buf.seek(0)
        read_df = pl.read_ipc(buf)
        assert df.frame_equal(read_df)
Example #3
0
def test_to_from_buffer(df: pl.DataFrame) -> None:
    df = df.drop("strings_nulls")

    for to_fn, from_fn, text_based in zip(
        [df.to_parquet, df.to_csv, df.to_ipc, df.to_json],
        [
            pl.read_parquet,
            partial(pl.read_csv, parse_dates=True),
            pl.read_ipc,
            pl.read_json,
        ],
        [False, True, False, True],
    ):
        f = io.BytesIO()
        to_fn(f)  # type: ignore
        f.seek(0)

        df_1 = from_fn(f)  # type: ignore
        # some type information is lost due to text conversion
        if text_based:
            df_1 = df_1.with_columns([
                pl.col("cat").cast(pl.Categorical),
                pl.col("time").cast(pl.Time)
            ])
        assert df.frame_equal(df_1)
Example #4
0
def test_replace():
    df = DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
    s = Series("c", [True, False, True])
    df.replace("a", s)
    assert df.frame_equal(DataFrame({
        "c": [True, False, True],
        "b": [1, 2, 3]
    }))
Example #5
0
def test_from_to_file(io_test_dir: str, example_df: pl.DataFrame,
                      compressions: List[str]) -> None:
    f = os.path.join(io_test_dir, "small.avro")

    for compression in compressions:
        example_df.write_avro(f, compression=compression)  # type: ignore
        df_read = pl.read_avro(str(f))
        assert example_df.frame_equal(df_read)
Example #6
0
def test_from_to_buffer(example_df: pl.DataFrame,
                        compressions: list[str]) -> None:
    for compression in compressions:
        buf = io.BytesIO()
        example_df.write_avro(
            buf, compression=compression)  # type: ignore[arg-type]
        buf.seek(0)
        read_df = pl.read_avro(buf)
        assert example_df.frame_equal(read_df)
Example #7
0
def test_to_from_buffer(df: pl.DataFrame) -> None:
    for buf in (io.BytesIO(), io.StringIO()):
        df.write_json(buf)
        buf.seek(0)
        read_df = pl.read_json(buf)
        read_df = read_df.with_columns(
            [pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time)]
        )
        assert df.frame_equal(read_df)
Example #8
0
def test_from_to_file(io_test_dir: str, df: pl.DataFrame,
                      compressions: List[str]) -> None:
    f = os.path.join(io_test_dir, "small.ipc")

    # does not yet work on windows because we hold an mmap?
    if os.name != "nt":
        for compression in compressions:
            df.write_ipc(f, compression=compression)  # type: ignore
            df_read = pl.read_ipc(str(f))
            assert df.frame_equal(df_read)
Example #9
0
def test_to_from_buffer(df: pl.DataFrame) -> None:
    buf = io.BytesIO()
    df.write_csv(buf)
    buf.seek(0)

    read_df = pl.read_csv(buf, parse_dates=True)

    read_df = read_df.with_columns(
        [pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time)]
    )
    assert df.frame_equal(read_df)
Example #10
0
def test_to_from_file(io_test_dir: str, df: pl.DataFrame) -> None:
    df = df.drop("strings_nulls")

    f = os.path.join(io_test_dir, "small.csv")
    df.write_csv(f)

    read_df = pl.read_csv(f, parse_dates=True)

    read_df = read_df.with_columns(
        [pl.col("cat").cast(pl.Categorical), pl.col("time").cast(pl.Time)]
    )
    assert df.frame_equal(read_df)
Example #11
0
def test_to_from_file(io_test_dir: str, df: pl.DataFrame,
                      compressions: List[str]) -> None:
    f = os.path.join(io_test_dir, "small.parquet")
    for compression in compressions:
        if compression == "lzo":
            # lzo compression is not supported now
            with pytest.raises(pl.ArrowError):
                df.write_parquet(f, compression=compression)
                _ = pl.read_parquet(f)

            with pytest.raises(OSError):
                df.write_parquet(f, compression=compression, use_pyarrow=True)
                _ = pl.read_parquet(f)
        else:
            df.write_parquet(f, compression=compression)
            read_df = pl.read_parquet(f)
            assert df.frame_equal(read_df)
Example #12
0
def test_to_from_buffer(df: pl.DataFrame, compressions: List[str]) -> None:
    for compression in compressions:
        if compression == "lzo":
            # lzo compression is not supported now
            with pytest.raises(pl.ArrowError):
                buf = io.BytesIO()
                df.write_parquet(buf, compression=compression)
                buf.seek(0)
                _ = pl.read_parquet(buf)

            with pytest.raises(OSError):
                buf = io.BytesIO()
                df.write_parquet(buf, compression=compression, use_pyarrow=True)
                buf.seek(0)
                _ = pl.read_parquet(buf)
        else:
            buf = io.BytesIO()
            df.write_parquet(buf, compression=compression)
            buf.seek(0)
            read_df = pl.read_parquet(buf)
            assert df.frame_equal(read_df)
Example #13
0
def test_slice():
    df = DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"]})
    df = df.slice(1, 2)
    assert df.frame_equal(DataFrame({"a": [1, 3], "b": ["b", "c"]}))
Example #14
0
def test_sort():
    df = DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
    df.sort("a", in_place=True)
    assert df.frame_equal(DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}))