コード例 #1
0
ファイル: test_utils.py プロジェクト: zywillc/fugue
def test_df_eq():
    df1 = ArrayDataFrame([[0, 100.0, "a"]], "a:int,b:double,c:str", dict(a=1))
    df2 = ArrayDataFrame([[0, 100.001, "a"]], "a:int,b:double,c:str",
                         dict(a=2))
    assert df_eq(df1, df1)
    assert df_eq(df1, df2, digits=4, check_metadata=False)
    # metadata
    assert not df_eq(df1, df2, digits=4, check_metadata=True)
    # precision
    assert not df_eq(df1, df2, digits=6, check_metadata=False)
    # no content
    assert df_eq(df1, df2, digits=6, check_metadata=False, check_content=False)
    raises(AssertionError, lambda: df_eq(df1, df2, throw=True))

    df1 = ArrayDataFrame([[100.0, "a"]], "a:double,b:str", dict(a=1))
    assert df_eq(df1, df1.as_pandas(), df1.schema, df1.metadata)

    df1 = ArrayDataFrame([[None, "a"]], "a:double,b:str", dict(a=1))
    assert df_eq(df1, df1)

    df1 = ArrayDataFrame([[None, "a"]], "a:double,b:str", dict(a=1))
    df2 = ArrayDataFrame([[np.nan, "a"]], "a:double,b:str", dict(a=1))
    assert df_eq(df1, df2)

    df1 = ArrayDataFrame([[100.0, None]], "a:double,b:str", dict(a=1))
    df2 = ArrayDataFrame([[100.0, None]], "a:double,b:str", dict(a=1))
    assert df_eq(df1, df2)

    df1 = ArrayDataFrame([[0], [1]], "a:int")
    df2 = ArrayDataFrame([[1], [0]], "a:int")
    assert df_eq(df1, df2)
    assert not df_eq(df1, df2, check_order=True)
コード例 #2
0
def test_nan_none():
    df = ArrayDataFrame([[None, None]], "b:str,c:double")
    assert df.as_pandas().iloc[0, 0] is None
    arr = PandasDataFrame(df.as_pandas(), df.schema).as_array()[0]
    assert arr[0] is None
    assert math.isnan(arr[1])

    df = ArrayDataFrame([[None, None]], "b:int,c:bool")
    arr = PandasDataFrame(df.as_pandas(),
                          df.schema).as_array(type_safe=True)[0]
    assert arr[0] is None
    assert arr[1] is None

    df = ArrayDataFrame([["a", 1.1], [None, None]], "b:str,c:double")
    arr = PandasDataFrame(df.as_pandas(),
                          df.schema).as_array(type_safe=True)[1]
    assert arr[0] is None
    assert arr[1] is None
コード例 #3
0
def test_nan_none():
    # TODO: on dask, these tests can't pass
    # df = ArrayDataFrame([[None, None]], "b:str,c:double")
    # assert df.as_pandas().iloc[0, 0] is None
    # arr = DaskDataFrame(df.as_pandas(), df.schema).as_array()[0]
    # assert arr[0] is None
    # assert math.isnan(arr[1])

    # df = ArrayDataFrame([[None, None]], "b:int,c:bool")
    # arr = DaskDataFrame(df.as_pandas(), df.schema).as_array(type_safe=True)[0]
    # assert np.isnan(arr[0])  # TODO: this will cause inconsistent behavior cross engine
    # assert np.isnan(arr[1])  # TODO: this will cause inconsistent behavior cross engine

    df = ArrayDataFrame([["a", 1.1], [None, None]], "b:str,c:double")
    arr = DaskDataFrame(df.as_pandas(), df.schema).as_array()[1]
    assert arr[0] is None
    assert math.isnan(arr[1])

    arr = DaskDataFrame(df.as_array(), df.schema).as_array()[1]
    assert arr[0] is None
    assert math.isnan(arr[1])

    arr = DaskDataFrame(df.as_pandas()["b"], "b:str").as_array()[1]
    assert arr[0] is None
コード例 #4
0
ファイル: test_utils.py プロジェクト: zywillc/fugue
def test_to_local_df():
    df = ArrayDataFrame([[0, 1]], "a:int,b:int")
    pdf = PandasDataFrame(df.as_pandas(), "a:int,b:int")
    idf = IterableDataFrame([[0, 1]], "a:int,b:int")
    assert to_local_df(df) is df
    assert to_local_df(pdf) is pdf
    assert to_local_df(idf) is idf
    assert isinstance(to_local_df(df.native, "a:int,b:int"), ArrayDataFrame)
    assert isinstance(to_local_df(pdf.native, "a:int,b:int"), PandasDataFrame)
    assert isinstance(to_local_df(idf.native, "a:int,b:int"),
                      IterableDataFrame)
    raises(TypeError, lambda: to_local_df(123))

    metadata = dict(a=1)
    assert to_local_df(df.native, df.schema, metadata).metadata == metadata

    raises(NoneArgumentError, lambda: to_local_df(None))
    raises(ValueError, lambda: to_local_df(df, "a:int,b:int", None))