def test_pandas_from_arrow(): arr = pa.array(["a", "b", "c"], pa.string()) expected_series_woutname = pd.Series(fr.FletcherArray(arr)) pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr)) rb = pa.RecordBatch.from_arrays([arr], ["column"]) expected_df = pd.DataFrame({"column": fr.FletcherArray(arr)}) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb))
def test_pandas_from_arrow_casting_to_pandas(arr): table = pa.Table.from_arrays( [ arr["int"], arr["int_with_nulls"], arr["dict"], arr["string"], arr["date"], arr["list"], arr["large_list"], ], [ "int", "int_with_nulls", "dict", "string", "date", "list", "large_list" ], ) df = fr.pandas_from_arrow(table) for col in df.columns: if col in ["int_with_nulls", "list", "large_list"]: assert isinstance(df[col].values, fr.FletcherArray) else: assert isinstance( df[col].values, (np.ndarray, pd.core.arrays.categorical.Categorical)) if col in ["list", "large_list"]: for i in range(len(arr[col])): assert np.all(df[col][i] == arr[col][i]) else: npt.assert_array_equal(df[col], arr[col].to_pandas())
def test_read_parquet(tmpdir, continuous): str_arr = pa.array(["a", None, "c"], pa.string()) int_arr = pa.array([1, None, -2], pa.int32()) bool_arr = pa.array([True, None, False], pa.bool_()) table = pa.Table.from_arrays([str_arr, int_arr, bool_arr], ["str", "int", "bool"]) pq.write_table(table, "df.parquet") result = fr.read_parquet("df.parquet", continuous=continuous) expected = fr.pandas_from_arrow(table, continuous=continuous) tm.assert_frame_equal(result, expected)
def test_pandas_from_arrow(): arr = pa.array(["a", "b", "c"], pa.string()) expected_series_woutname = pd.Series(fr.FletcherChunkedArray(arr)) pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr)) expected_series_woutname = pd.Series(fr.FletcherContinuousArray(arr)) pdt.assert_series_equal( expected_series_woutname, fr.pandas_from_arrow(arr, continuous=True) ) rb = pa.RecordBatch.from_arrays([arr], ["column"]) expected_df = pd.DataFrame({"column": fr.FletcherChunkedArray(arr)}) table = pa.Table.from_arrays([arr], ["column"]) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb)) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table)) expected_df = pd.DataFrame({"column": fr.FletcherContinuousArray(arr)}) table = pa.Table.from_arrays([arr], ["column"]) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb, continuous=True)) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table, continuous=True))