Esempio n. 1
0
def test_check_meta_typename():
    df = pd.DataFrame({'x': []})
    ddf = dd.from_pandas(df, npartitions=1)
    check_meta(df, df)
    with pytest.raises(Exception) as info:
        check_meta(ddf, df)

    assert "dask" in str(info.value)
    assert "pandas" in str(info.value)
Esempio n. 2
0
def test_check_meta():
    df = pd.DataFrame(
        {
            "a": ["x", "y", "z"],
            "b": [True, False, True],
            "c": [1, 2.5, 3.5],
            "d": [1, 2, 3],
            "e": pd.Categorical(["x", "y", "z"]),
            "f": pd.Series([1, 2, 3], dtype=np.uint64),
        }
    )
    meta = df.iloc[:0]

    # DataFrame metadata passthrough if correct
    assert check_meta(df, meta) is df
    # Series metadata passthrough if correct
    e = df.e
    assert check_meta(e, meta.e) is e
    # numeric_equal means floats and ints are equivalent
    d = df.d
    f = df.f
    assert check_meta(d, meta.d.astype("f8"), numeric_equal=True) is d
    assert check_meta(f, meta.f.astype("f8"), numeric_equal=True) is f
    assert check_meta(f, meta.f.astype("i8"), numeric_equal=True) is f

    # Series metadata error
    with pytest.raises(ValueError) as err:
        check_meta(d, meta.d.astype("f8"), numeric_equal=False)
    assert str(err.value) == (
        "Metadata mismatch found.\n"
        "\n"
        "Partition type: `pandas.core.series.Series`\n"
        "+----------+---------+\n"
        "|          | dtype   |\n"
        "+----------+---------+\n"
        "| Found    | int64   |\n"
        "| Expected | float64 |\n"
        "+----------+---------+"
    )

    # DataFrame metadata error
    meta2 = meta.astype({"a": "category", "d": "f8"})[["a", "b", "c", "d"]]
    df2 = df[["a", "b", "d", "e"]]
    with pytest.raises(ValueError) as err:
        check_meta(df2, meta2, funcname="from_delayed")

    exp = (
        "Metadata mismatch found in `from_delayed`.\n"
        "\n"
        "Partition type: `pandas.core.frame.DataFrame`\n"
        "+--------+----------+----------+\n"
        "| Column | Found    | Expected |\n"
        "+--------+----------+----------+\n"
        "| 'a'    | object   | category |\n"
        "| 'c'    | -        | float64  |\n"
        "| 'e'    | category | -        |\n"
        "+--------+----------+----------+"
    )
    assert str(err.value) == exp
Esempio n. 3
0
def test_check_meta():
    df = pd.DataFrame({
        'a': ['x', 'y', 'z'],
        'b': [True, False, True],
        'c': [1, 2.5, 3.5],
        'd': [1, 2, 3],
        'e': pd.Categorical(['x', 'y', 'z']),
        'f': pd.Series([1, 2, 3], dtype=np.uint64)
    })
    meta = df.iloc[:0]

    # DataFrame metadata passthrough if correct
    assert check_meta(df, meta) is df
    # Series metadata passthrough if correct
    e = df.e
    assert check_meta(e, meta.e) is e
    # numeric_equal means floats and ints are equivalent
    d = df.d
    f = df.f
    assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d
    assert check_meta(f, meta.f.astype('f8'), numeric_equal=True) is f
    assert check_meta(f, meta.f.astype('i8'), numeric_equal=True) is f

    # Series metadata error
    with pytest.raises(ValueError) as err:
        check_meta(d, meta.d.astype('f8'), numeric_equal=False)
    assert str(err.value) == ('Metadata mismatch found.\n'
                              '\n'
                              'Partition type: `pandas.core.series.Series`\n'
                              '+----------+---------+\n'
                              '|          | dtype   |\n'
                              '+----------+---------+\n'
                              '| Found    | int64   |\n'
                              '| Expected | float64 |\n'
                              '+----------+---------+')

    # DataFrame metadata error
    meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']]
    df2 = df[['a', 'b', 'd', 'e']]
    with pytest.raises(ValueError) as err:
        check_meta(df2, meta2, funcname='from_delayed')

    exp = ('Metadata mismatch found in `from_delayed`.\n'
           '\n'
           'Partition type: `pandas.core.frame.DataFrame`\n'
           '+--------+----------+----------+\n'
           '| Column | Found    | Expected |\n'
           '+--------+----------+----------+\n'
           '| a      | object   | category |\n'
           '| c      | -        | float64  |\n'
           '| e      | category | -        |\n'
           '+--------+----------+----------+')
    assert str(err.value) == exp
Esempio n. 4
0
def test_check_meta():
    df = pd.DataFrame({'a': ['x', 'y', 'z'],
                       'b': [True, False, True],
                       'c': [1, 2.5, 3.5],
                       'd': [1, 2, 3],
                       'e': pd.Categorical(['x', 'y', 'z']),
                       'f': pd.Series([1, 2, 3], dtype=np.uint64)})
    meta = df.iloc[:0]

    # DataFrame metadata passthrough if correct
    assert check_meta(df, meta) is df
    # Series metadata passthrough if correct
    e = df.e
    assert check_meta(e, meta.e) is e
    # numeric_equal means floats and ints are equivalent
    d = df.d
    f = df.f
    assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d
    assert check_meta(f, meta.f.astype('f8'), numeric_equal=True) is f
    assert check_meta(f, meta.f.astype('i8'), numeric_equal=True) is f

    # Series metadata error
    with pytest.raises(ValueError) as err:
        check_meta(d, meta.d.astype('f8'), numeric_equal=False)
    assert str(err.value) == ('Metadata mismatch found.\n'
                              '\n'
                              'Partition type: `Series`\n'
                              '+----------+---------+\n'
                              '|          | dtype   |\n'
                              '+----------+---------+\n'
                              '| Found    | int64   |\n'
                              '| Expected | float64 |\n'
                              '+----------+---------+')

    # DataFrame metadata error
    meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']]
    df2 = df[['a', 'b', 'd', 'e']]
    with pytest.raises(ValueError) as err:
        check_meta(df2, meta2, funcname='from_delayed')

    exp = (
        'Metadata mismatch found in `from_delayed`.\n'
        '\n'
        'Partition type: `DataFrame`\n'
        '+--------+----------+----------+\n'
        '| Column | Found    | Expected |\n'
        '+--------+----------+----------+\n'
        '| a      | object   | category |\n'
        '| c      | -        | float64  |\n'
        '| e      | category | -        |\n'
        '+--------+----------+----------+')
    assert str(err.value) == exp
Esempio n. 5
0
def test_check_meta():
    df = pd.DataFrame({
        'a': ['x', 'y', 'z'],
        'b': [True, False, True],
        'c': [1, 2.5, 3.5],
        'd': [1, 2, 3],
        'e': pd.Categorical(['x', 'y', 'z'])
    })
    meta = df.iloc[:0]

    # DataFrame metadata passthrough if correct
    assert check_meta(df, meta) is df
    # Series metadata passthrough if correct
    e = df.e
    assert check_meta(e, meta.e) is e
    # numeric_equal means floats and ints are equivalent
    d = df.d
    assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d

    # Series metadata error
    with pytest.raises(ValueError) as err:
        check_meta(d, meta.d.astype('f8'), numeric_equal=False)
    assert str(err.value) == ('Metadata mismatch found.\n'
                              '\n'
                              'Partition type: `Series`\n'
                              '+----------+---------+\n'
                              '|          | dtype   |\n'
                              '+----------+---------+\n'
                              '| Found    | int64   |\n'
                              '| Expected | float64 |\n'
                              '+----------+---------+')

    # DataFrame metadata error
    meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']]
    df2 = df[['a', 'b', 'd', 'e']]
    with pytest.raises(ValueError) as err:
        check_meta(df2, meta2, funcname='from_delayed')

    if PANDAS_VERSION >= '0.21.0':
        exp = (
            'Metadata mismatch found in `from_delayed`.\n'
            '\n'
            'Partition type: `DataFrame`\n'
            '+--------+-------------------------------------------------------------+------------------------------------------------+\n'  # noqa
            '| Column | Found                                                       | Expected                                       |\n'  # noqa
            '+--------+-------------------------------------------------------------+------------------------------------------------+\n'  # noqa
            '| a      | object                                                      | CategoricalDtype(categories=[], ordered=False) |\n'  # noqa
            '| c      | -                                                           | float64                                        |\n'  # noqa
            "| e      | CategoricalDtype(categories=['x', 'y', 'z'], ordered=False) | -                                              |\n"  # noqa
            '+--------+-------------------------------------------------------------+------------------------------------------------+'  # noqa
        )
    else:
        exp = ('Metadata mismatch found in `from_delayed`.\n'
               '\n'
               'Partition type: `DataFrame`\n'
               '+--------+----------+----------+\n'
               '| Column | Found    | Expected |\n'
               '+--------+----------+----------+\n'
               '| a      | object   | category |\n'
               '| c      | -        | float64  |\n'
               '| e      | category | -        |\n'
               '+--------+----------+----------+')
    assert str(err.value) == exp