Beispiel #1
0
def test_simple_methods():
    df = PandasDataFrame([], "a:str,b:int")
    assert df.as_pandas() is df.native
    assert df.empty
    assert 0 == df.count()
    assert df.is_local

    df = PandasDataFrame([["a", 1], ["b", "2"]], "x:str,y:double")
    assert df.as_pandas() is df.native
    assert not df.empty
    assert 2 == df.count()
    assert ["a", 1.0] == df.peek_array()
    assert dict(x="a", y=1.0) == df.peek_dict()
Beispiel #2
0
def test_init():
    df = PandasDataFrame(schema="a:str,b:int")
    assert df.is_bounded
    assert df.count() == 0
    assert df.schema == "a:str,b:int"
    assert Schema(df.native) == "a:str,b:int"

    pdf = pd.DataFrame([["a", 1], ["b", 2]])
    raises(FugueDataFrameInitError, lambda: PandasDataFrame(pdf))
    df = PandasDataFrame(pdf, "a:str,b:str")
    assert [["a", "1"], ["b", "2"]] == df.native.values.tolist()
    df = PandasDataFrame(pdf, "a:str,b:int")
    assert [["a", 1], ["b", 2]] == df.native.values.tolist()
    df = PandasDataFrame(pdf, "a:str,b:double")
    assert [["a", 1.0], ["b", 2.0]] == df.native.values.tolist()

    pdf = pd.DataFrame([["a", 1], ["b", 2]], columns=["a", "b"])["b"]
    assert isinstance(pdf, pd.Series)
    df = PandasDataFrame(pdf, "b:str")
    assert [["1"], ["2"]] == df.native.values.tolist()
    df = PandasDataFrame(pdf, "b:double")
    assert [[1.0], [2.0]] == df.native.values.tolist()

    pdf = pd.DataFrame([["a", 1], ["b", 2]], columns=["x", "y"])
    df = PandasDataFrame(pdf)
    assert df.schema == "x:str,y:long"
    df = PandasDataFrame(pdf, "y:str,x:str")
    assert [["1", "a"], ["2", "b"]] == df.native.values.tolist()
    ddf = PandasDataFrame(df)
    assert [["1", "a"], ["2", "b"]] == ddf.native.values.tolist()
    assert df.native is ddf.native  # no real copy happened

    df = PandasDataFrame([["a", 1], ["b", "2"]], "x:str,y:double")
    assert [["a", 1.0], ["b", 2.0]] == df.native.values.tolist()

    df = PandasDataFrame([], "x:str,y:double")
    assert [] == df.native.values.tolist()

    raises(FugueDataFrameInitError, lambda: PandasDataFrame(123))