def to_local_df(df: Any, schema: Any = None, metadata: Any = None) -> LocalDataFrame: """Convert a data structure to :class:`~fugue.dataframe.dataframe.LocalDataFrame` :param df: :class:`~fugue.dataframe.dataframe.DataFrame`, pandas DataFramme and list or iterable of arrays :param schema: |SchemaLikeObject|, defaults to None, it should not be set for :class:`~fugue.dataframe.dataframe.DataFrame` type :param metadata: dict-like object with string keys, defaults to None :raises ValueError: if ``df`` is :class:`~fugue.dataframe.dataframe.DataFrame` but you set ``schema`` or ``metadata`` :raises TypeError: if ``df`` is not compatible :return: the dataframe itself if it's :class:`~fugue.dataframe.dataframe.LocalDataFrame` else a converted one :Examples: >>> a = to_local_df([[0,'a'],[1,'b']],"a:int,b:str") >>> assert to_local_df(a) is a >>> to_local_df(SparkDataFrame([[0,'a'],[1,'b']],"a:int,b:str")) """ assert_arg_not_none(df, "df") if isinstance(df, DataFrame): aot( schema is None and metadata is None, ValueError("schema and metadata must be None when df is a DataFrame"), ) return df.as_local() if isinstance(df, pd.DataFrame): return PandasDataFrame(df, schema, metadata) if isinstance(df, List): return ArrayDataFrame(df, schema, metadata) if isinstance(df, Iterable): return IterableDataFrame(df, schema, metadata) raise TypeError(f"{df} cannot convert to a LocalDataFrame")
def test_to_local_bounded_df(): df = ArrayDataFrame([[0, 1]], "a:int,b:int") idf = IterableDataFrame([[0, 1]], "a:int,b:int", dict(a=1)) assert to_local_bounded_df(df) is df r = to_local_bounded_df(idf) assert r is not idf assert r.as_array() == [[0, 1]] assert r.schema == "a:int,b:int" assert r.metadata == dict(a=1)
def test_to_local_df(): df = ArrayDataFrame([[0, 1]], "a:int,b:int") pdf = PandasDataFrame(df.as_pandas(), "a:int,b:int") idf = IterableDataFrame([[0, 1]], "a:int,b:int") assert to_local_df(df) is df assert to_local_df(pdf) is pdf assert to_local_df(idf) is idf assert isinstance(to_local_df(df.native, "a:int,b:int"), ArrayDataFrame) assert isinstance(to_local_df(pdf.native, "a:int,b:int"), PandasDataFrame) assert isinstance(to_local_df(idf.native, "a:int,b:int"), IterableDataFrame) raises(TypeError, lambda: to_local_df(123)) metadata = dict(a=1) assert to_local_df(df.native, df.schema, metadata).metadata == metadata raises(NoneArgumentError, lambda: to_local_df(None)) raises(ValueError, lambda: to_local_df(df, "a:int,b:int", None))
def test_serialize_df(tmpdir): def assert_eq(df, df_expected=None, raw=False): if df_expected is None: df_expected = df df_actual = deserialize_df(serialize_df(df)) if raw: assert df_expected.native == df_actual.native else: df_eq(df_expected, df_actual, throw=True) fs = FileSystem() assert deserialize_df(serialize_df(None)) is None assert_eq(ArrayDataFrame([], "a:int,b:int")) assert_eq(ArrayDataFrame([[None, None]], "a:int,b:int")) assert_eq(ArrayDataFrame([[None, "abc"]], "a:int,b:str")) assert_eq(ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), raw=True) assert_eq( IterableDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), raw=True, ) assert_eq(PandasDataFrame([[None, None]], "a:int,b:int")) assert_eq(PandasDataFrame([[None, "abc"]], "a:int,b:str")) raises( InvalidOperationError, lambda: serialize_df(ArrayDataFrame([], "a:int,b:int"), 0), ) path = os.path.join(tmpdir, "1.pkl") df = ArrayDataFrame([[None, None]], "a:int,b:int") s = serialize_df(df, 0, path, fs) df_eq(df, deserialize_df(s, fs), throw=True) df_eq(df, deserialize_df(s), throw=True) s = serialize_df(df, 0, path) df_eq(df, deserialize_df(s), throw=True) raises(ValueError, lambda: deserialize_df('{"x":1}'))
def test_pickle_df(): def assert_eq(df, df_expected=None, raw=False): if df_expected is None: df_expected = df df_actual = unpickle_df(pickle_df(df)) if raw: assert df_expected.native == df_actual.native else: df_eq(df_expected, df_actual, throw=True) assert_eq(ArrayDataFrame([], "a:int,b:int")) assert_eq(ArrayDataFrame([[None, None]], "a:int,b:int")) assert_eq(ArrayDataFrame([[None, "abc"]], "a:int,b:str")) assert_eq(ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), raw=True) assert_eq(IterableDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), raw=True) assert_eq(PandasDataFrame([[None, None]], "a:int,b:int")) assert_eq(PandasDataFrame([[None, "abc"]], "a:int,b:str"))