Beispiel #1
0
def test_pandas_from_arrow():
    arr = pa.array(["a", "b", "c"], pa.string())

    expected_series_woutname = pd.Series(fr.FletcherArray(arr))
    pdt.assert_series_equal(expected_series_woutname,
                            fr.pandas_from_arrow(arr))

    rb = pa.RecordBatch.from_arrays([arr], ["column"])
    expected_df = pd.DataFrame({"column": fr.FletcherArray(arr)})
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb))
Beispiel #2
0
def test_pandas_from_arrow_casting_to_pandas(arr):

    table = pa.Table.from_arrays(
        [
            arr["int"],
            arr["int_with_nulls"],
            arr["dict"],
            arr["string"],
            arr["date"],
            arr["list"],
            arr["large_list"],
        ],
        [
            "int", "int_with_nulls", "dict", "string", "date", "list",
            "large_list"
        ],
    )

    df = fr.pandas_from_arrow(table)

    for col in df.columns:
        if col in ["int_with_nulls", "list", "large_list"]:
            assert isinstance(df[col].values, fr.FletcherArray)
        else:
            assert isinstance(
                df[col].values,
                (np.ndarray, pd.core.arrays.categorical.Categorical))

        if col in ["list", "large_list"]:
            for i in range(len(arr[col])):
                assert np.all(df[col][i] == arr[col][i])
        else:
            npt.assert_array_equal(df[col], arr[col].to_pandas())
Beispiel #3
0
def test_read_parquet(tmpdir, continuous):
    str_arr = pa.array(["a", None, "c"], pa.string())
    int_arr = pa.array([1, None, -2], pa.int32())
    bool_arr = pa.array([True, None, False], pa.bool_())
    table = pa.Table.from_arrays([str_arr, int_arr, bool_arr],
                                 ["str", "int", "bool"])

    pq.write_table(table, "df.parquet")
    result = fr.read_parquet("df.parquet", continuous=continuous)
    expected = fr.pandas_from_arrow(table, continuous=continuous)
    tm.assert_frame_equal(result, expected)
Beispiel #4
0
def test_pandas_from_arrow():
    arr = pa.array(["a", "b", "c"], pa.string())

    expected_series_woutname = pd.Series(fr.FletcherChunkedArray(arr))
    pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr))

    expected_series_woutname = pd.Series(fr.FletcherContinuousArray(arr))
    pdt.assert_series_equal(
        expected_series_woutname, fr.pandas_from_arrow(arr, continuous=True)
    )

    rb = pa.RecordBatch.from_arrays([arr], ["column"])
    expected_df = pd.DataFrame({"column": fr.FletcherChunkedArray(arr)})
    table = pa.Table.from_arrays([arr], ["column"])
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb))
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table))

    expected_df = pd.DataFrame({"column": fr.FletcherContinuousArray(arr)})
    table = pa.Table.from_arrays([arr], ["column"])
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb, continuous=True))
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table, continuous=True))