def test_check_meta_typename(): df = pd.DataFrame({'x': []}) ddf = dd.from_pandas(df, npartitions=1) check_meta(df, df) with pytest.raises(Exception) as info: check_meta(ddf, df) assert "dask" in str(info.value) assert "pandas" in str(info.value)
def test_check_meta(): df = pd.DataFrame( { "a": ["x", "y", "z"], "b": [True, False, True], "c": [1, 2.5, 3.5], "d": [1, 2, 3], "e": pd.Categorical(["x", "y", "z"]), "f": pd.Series([1, 2, 3], dtype=np.uint64), } ) meta = df.iloc[:0] # DataFrame metadata passthrough if correct assert check_meta(df, meta) is df # Series metadata passthrough if correct e = df.e assert check_meta(e, meta.e) is e # numeric_equal means floats and ints are equivalent d = df.d f = df.f assert check_meta(d, meta.d.astype("f8"), numeric_equal=True) is d assert check_meta(f, meta.f.astype("f8"), numeric_equal=True) is f assert check_meta(f, meta.f.astype("i8"), numeric_equal=True) is f # Series metadata error with pytest.raises(ValueError) as err: check_meta(d, meta.d.astype("f8"), numeric_equal=False) assert str(err.value) == ( "Metadata mismatch found.\n" "\n" "Partition type: `pandas.core.series.Series`\n" "+----------+---------+\n" "| | dtype |\n" "+----------+---------+\n" "| Found | int64 |\n" "| Expected | float64 |\n" "+----------+---------+" ) # DataFrame metadata error meta2 = meta.astype({"a": "category", "d": "f8"})[["a", "b", "c", "d"]] df2 = df[["a", "b", "d", "e"]] with pytest.raises(ValueError) as err: check_meta(df2, meta2, funcname="from_delayed") exp = ( "Metadata mismatch found in `from_delayed`.\n" "\n" "Partition type: `pandas.core.frame.DataFrame`\n" "+--------+----------+----------+\n" "| Column | Found | Expected |\n" "+--------+----------+----------+\n" "| 'a' | object | category |\n" "| 'c' | - | float64 |\n" "| 'e' | category | - |\n" "+--------+----------+----------+" ) assert str(err.value) == exp
def test_check_meta(): df = pd.DataFrame({ 'a': ['x', 'y', 'z'], 'b': [True, False, True], 'c': [1, 2.5, 3.5], 'd': [1, 2, 3], 'e': pd.Categorical(['x', 'y', 'z']), 'f': pd.Series([1, 2, 3], dtype=np.uint64) }) meta = df.iloc[:0] # DataFrame metadata passthrough if correct assert check_meta(df, meta) is df # Series metadata passthrough if correct e = df.e assert check_meta(e, meta.e) is e # numeric_equal means floats and ints are equivalent d = df.d f = df.f assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d assert check_meta(f, meta.f.astype('f8'), numeric_equal=True) is f assert check_meta(f, meta.f.astype('i8'), numeric_equal=True) is f # Series metadata error with pytest.raises(ValueError) as err: check_meta(d, meta.d.astype('f8'), numeric_equal=False) assert str(err.value) == ('Metadata mismatch found.\n' '\n' 'Partition type: `pandas.core.series.Series`\n' '+----------+---------+\n' '| | dtype |\n' '+----------+---------+\n' '| Found | int64 |\n' '| Expected | float64 |\n' '+----------+---------+') # DataFrame metadata error meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']] df2 = df[['a', 'b', 'd', 'e']] with pytest.raises(ValueError) as err: check_meta(df2, meta2, funcname='from_delayed') exp = ('Metadata mismatch found in `from_delayed`.\n' '\n' 'Partition type: `pandas.core.frame.DataFrame`\n' '+--------+----------+----------+\n' '| Column | Found | Expected |\n' '+--------+----------+----------+\n' '| a | object | category |\n' '| c | - | float64 |\n' '| e | category | - |\n' '+--------+----------+----------+') assert str(err.value) == exp
def test_check_meta(): df = pd.DataFrame({'a': ['x', 'y', 'z'], 'b': [True, False, True], 'c': [1, 2.5, 3.5], 'd': [1, 2, 3], 'e': pd.Categorical(['x', 'y', 'z']), 'f': pd.Series([1, 2, 3], dtype=np.uint64)}) meta = df.iloc[:0] # DataFrame metadata passthrough if correct assert check_meta(df, meta) is df # Series metadata passthrough if correct e = df.e assert check_meta(e, meta.e) is e # numeric_equal means floats and ints are equivalent d = df.d f = df.f assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d assert check_meta(f, meta.f.astype('f8'), numeric_equal=True) is f assert check_meta(f, meta.f.astype('i8'), numeric_equal=True) is f # Series metadata error with pytest.raises(ValueError) as err: check_meta(d, meta.d.astype('f8'), numeric_equal=False) assert str(err.value) == ('Metadata mismatch found.\n' '\n' 'Partition type: `Series`\n' '+----------+---------+\n' '| | dtype |\n' '+----------+---------+\n' '| Found | int64 |\n' '| Expected | float64 |\n' '+----------+---------+') # DataFrame metadata error meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']] df2 = df[['a', 'b', 'd', 'e']] with pytest.raises(ValueError) as err: check_meta(df2, meta2, funcname='from_delayed') exp = ( 'Metadata mismatch found in `from_delayed`.\n' '\n' 'Partition type: `DataFrame`\n' '+--------+----------+----------+\n' '| Column | Found | Expected |\n' '+--------+----------+----------+\n' '| a | object | category |\n' '| c | - | float64 |\n' '| e | category | - |\n' '+--------+----------+----------+') assert str(err.value) == exp
def test_check_meta(): df = pd.DataFrame({ 'a': ['x', 'y', 'z'], 'b': [True, False, True], 'c': [1, 2.5, 3.5], 'd': [1, 2, 3], 'e': pd.Categorical(['x', 'y', 'z']) }) meta = df.iloc[:0] # DataFrame metadata passthrough if correct assert check_meta(df, meta) is df # Series metadata passthrough if correct e = df.e assert check_meta(e, meta.e) is e # numeric_equal means floats and ints are equivalent d = df.d assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d # Series metadata error with pytest.raises(ValueError) as err: check_meta(d, meta.d.astype('f8'), numeric_equal=False) assert str(err.value) == ('Metadata mismatch found.\n' '\n' 'Partition type: `Series`\n' '+----------+---------+\n' '| | dtype |\n' '+----------+---------+\n' '| Found | int64 |\n' '| Expected | float64 |\n' '+----------+---------+') # DataFrame metadata error meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']] df2 = df[['a', 'b', 'd', 'e']] with pytest.raises(ValueError) as err: check_meta(df2, meta2, funcname='from_delayed') if PANDAS_VERSION >= '0.21.0': exp = ( 'Metadata mismatch found in `from_delayed`.\n' '\n' 'Partition type: `DataFrame`\n' '+--------+-------------------------------------------------------------+------------------------------------------------+\n' # noqa '| Column | Found | Expected |\n' # noqa '+--------+-------------------------------------------------------------+------------------------------------------------+\n' # noqa '| a | object | CategoricalDtype(categories=[], ordered=False) |\n' # noqa '| c | - | float64 |\n' # noqa "| e | CategoricalDtype(categories=['x', 'y', 'z'], ordered=False) | - |\n" # noqa '+--------+-------------------------------------------------------------+------------------------------------------------+' # noqa ) else: exp = ('Metadata mismatch found in `from_delayed`.\n' '\n' 'Partition type: `DataFrame`\n' '+--------+----------+----------+\n' '| Column | Found | Expected |\n' '+--------+----------+----------+\n' '| a | object | category |\n' '| c | - | float64 |\n' '| e | category | - |\n' '+--------+----------+----------+') assert str(err.value) == exp