def test_simple_methods(): df = PandasDataFrame([], "a:str,b:int") assert df.as_pandas() is df.native assert df.empty assert 0 == df.count() assert df.is_local df = PandasDataFrame([["a", 1], ["b", "2"]], "x:str,y:double") assert df.as_pandas() is df.native assert not df.empty assert 2 == df.count() assert ["a", 1.0] == df.peek_array() assert dict(x="a", y=1.0) == df.peek_dict()
def test_init(): df = PandasDataFrame(schema="a:str,b:int") assert df.is_bounded assert df.count() == 0 assert df.schema == "a:str,b:int" assert Schema(df.native) == "a:str,b:int" pdf = pd.DataFrame([["a", 1], ["b", 2]]) raises(FugueDataFrameInitError, lambda: PandasDataFrame(pdf)) df = PandasDataFrame(pdf, "a:str,b:str") assert [["a", "1"], ["b", "2"]] == df.native.values.tolist() df = PandasDataFrame(pdf, "a:str,b:int") assert [["a", 1], ["b", 2]] == df.native.values.tolist() df = PandasDataFrame(pdf, "a:str,b:double") assert [["a", 1.0], ["b", 2.0]] == df.native.values.tolist() pdf = pd.DataFrame([["a", 1], ["b", 2]], columns=["a", "b"])["b"] assert isinstance(pdf, pd.Series) df = PandasDataFrame(pdf, "b:str") assert [["1"], ["2"]] == df.native.values.tolist() df = PandasDataFrame(pdf, "b:double") assert [[1.0], [2.0]] == df.native.values.tolist() pdf = pd.DataFrame([["a", 1], ["b", 2]], columns=["x", "y"]) df = PandasDataFrame(pdf) assert df.schema == "x:str,y:long" df = PandasDataFrame(pdf, "y:str,x:str") assert [["1", "a"], ["2", "b"]] == df.native.values.tolist() ddf = PandasDataFrame(df) assert [["1", "a"], ["2", "b"]] == ddf.native.values.tolist() assert df.native is ddf.native # no real copy happened df = PandasDataFrame([["a", 1], ["b", "2"]], "x:str,y:double") assert [["a", 1.0], ["b", 2.0]] == df.native.values.tolist() df = PandasDataFrame([], "x:str,y:double") assert [] == df.native.values.tolist() raises(FugueDataFrameInitError, lambda: PandasDataFrame(123))