Ejemplo n.º 1
0
 def assert_eq(df, df_expected=None, raw=False):
     if df_expected is None:
         df_expected = df
     df_actual = deserialize_df(serialize_df(df))
     if raw:
         assert df_expected.native == df_actual.native
     else:
         df_eq(df_expected, df_actual, throw=True)
Ejemplo n.º 2
0
def test_serialize_df(tmpdir):
    def assert_eq(df, df_expected=None, raw=False):
        if df_expected is None:
            df_expected = df
        df_actual = deserialize_df(serialize_df(df))
        if raw:
            assert df_expected.native == df_actual.native
        else:
            df_eq(df_expected, df_actual, throw=True)

    fs = FileSystem()
    assert deserialize_df(serialize_df(None)) is None
    assert_eq(ArrayDataFrame([], "a:int,b:int"))
    assert_eq(ArrayDataFrame([[None, None]], "a:int,b:int"))
    assert_eq(ArrayDataFrame([[None, "abc"]], "a:int,b:str"))
    assert_eq(ArrayDataFrame([[None, [1, 2], dict(x=1)]],
                             "a:int,b:[int],c:{x:int}"),
              raw=True)
    assert_eq(
        IterableDataFrame([[None, [1, 2], dict(x=1)]],
                          "a:int,b:[int],c:{x:int}"),
        ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"),
        raw=True,
    )
    assert_eq(PandasDataFrame([[None, None]], "a:int,b:int"))
    assert_eq(PandasDataFrame([[None, "abc"]], "a:int,b:str"))

    raises(
        InvalidOperationError,
        lambda: serialize_df(ArrayDataFrame([], "a:int,b:int"), 0),
    )

    path = os.path.join(tmpdir, "1.pkl")

    df = ArrayDataFrame([[None, None]], "a:int,b:int")
    s = serialize_df(df, 0, path, fs)
    df_eq(df, deserialize_df(s, fs), throw=True)
    df_eq(df, deserialize_df(s), throw=True)

    s = serialize_df(df, 0, path)
    df_eq(df, deserialize_df(s), throw=True)

    raises(ValueError, lambda: deserialize_df('{"x":1}'))
Ejemplo n.º 3
0
 def _get_dfs(self, row: Any) -> Iterable[Any]:
     for k, name, v in self.df_idx:
         if row[k] is None:
             df: DataFrame = ArrayDataFrame([], v)
         else:
             df = deserialize_df(row[k])  # type: ignore
             assert df is not None
         if self.named:
             yield name, df
         else:
             yield df