Beispiel #1
0
def ray_df_equals_pandas(ray_df, pandas_df):
    assert isinstance(ray_df, pd.DataFrame)
    # Order may not match here, but pandas behavior can change, so we will be consistent
    # ourselves in keeping the columns in the order they were in before the groupby
    assert (to_pandas(ray_df).equals(pandas_df)
            or (all(ray_df.isna().all()) and all(pandas_df.isna().all()))
            or to_pandas(ray_df)[list(pandas_df.columns)].equals(pandas_df))
Beispiel #2
0
def modin_df_almost_equals_pandas(modin_df, pandas_df):
    difference = to_pandas(modin_df) - pandas_df
    diff_max = difference.max()
    if isinstance(diff_max, pandas.Series):
        diff_max = diff_max.max()
    assert (to_pandas(modin_df).equals(pandas_df) or diff_max < 0.0001
            or (all(modin_df.isna().all()) and all(pandas_df.isna().all())))
Beispiel #3
0
def ray_df_almost_equals_pandas(ray_df, pandas_df):
    difference = to_pandas(ray_df) - pandas_df
    diff_max = difference.max().max()
    assert (
        to_pandas(ray_df).equals(pandas_df)
        or diff_max < 0.0001
        or (all(ray_df.isna().all()) and all(pandas_df.isna().all()))
    )
Beispiel #4
0
def ray_df_equals_pandas(ray_df, pandas_df):
    assert isinstance(ray_df, pd.DataFrame)
    # Order may not match here, but pandas behavior can change, so we will be consistent
    # ourselves in keeping the columns in the order they were in before the groupby
    try:
        assert (
            to_pandas(ray_df).equals(pandas_df)
            or (all(ray_df.isna().all()) and all(pandas_df.isna().all()))
            or to_pandas(ray_df)[list(pandas_df.columns)].equals(pandas_df)
        )
    # Pandas does not seem to be consistent with the way that it handles as_index.
    # Because the behavior is determined to be non-deterministic we will at least check
    # that everything else matches if we drop that column from the pandas side.
    except KeyError:
        assert to_pandas(ray_df).equals(pandas_df.drop(columns=[ray_df.index.name]))
Beispiel #5
0
def modin_df_equals_pandas(modin_df, pandas_df):
    df1 = to_pandas(modin_df).sort_index()
    df2 = pandas_df.sort_index()
    if os.environ.get("MODIN_BACKEND", "Pandas").lower() == "pyarrow":
        if not df1.dtypes.equals(df2.dtypes):
            return df2.astype(df1.dtypes).equals(df1)
    return df1.equals(df2)
Beispiel #6
0
def test_where():
    frame_data = random_state.randn(100, 10)
    pandas_df = pandas.DataFrame(frame_data, columns=list("abcdefghij"))
    modin_df = pd.DataFrame(frame_data, columns=list("abcdefghij"))
    pandas_cond_df = pandas_df % 5 < 2
    modin_cond_df = modin_df % 5 < 2

    pandas_result = pandas_df.where(pandas_cond_df, -pandas_df)
    modin_result = modin_df.where(modin_cond_df, -modin_df)
    assert all((to_pandas(modin_result) == pandas_result).all())

    other = pandas_df.loc[3]
    pandas_result = pandas_df.where(pandas_cond_df, other, axis=1)
    modin_result = modin_df.where(modin_cond_df, other, axis=1)
    assert all((to_pandas(modin_result) == pandas_result).all())

    other = pandas_df["e"]
    pandas_result = pandas_df.where(pandas_cond_df, other, axis=0)
    modin_result = modin_df.where(modin_cond_df, other, axis=0)
    assert all((to_pandas(modin_result) == pandas_result).all())

    pandas_result = pandas_df.where(pandas_df < 2, True)
    modin_result = modin_df.where(modin_df < 2, True)
    assert all((to_pandas(modin_result) == pandas_result).all())
Beispiel #7
0
def modin_df_almost_equals_pandas(modin_df, pandas_df):
    df_categories_equals(modin_df._to_pandas(), pandas_df)

    modin_df = to_pandas(modin_df)

    if hasattr(modin_df, "select_dtypes"):
        modin_df = modin_df.select_dtypes(exclude=["category"])
    if hasattr(pandas_df, "select_dtypes"):
        pandas_df = pandas_df.select_dtypes(exclude=["category"])

    difference = modin_df - pandas_df
    diff_max = difference.max()
    if isinstance(diff_max, pandas.Series):
        diff_max = diff_max.max()
    assert (modin_df.equals(pandas_df) or diff_max < 0.0001
            or (all(modin_df.isna().all()) and all(pandas_df.isna().all())))
Beispiel #8
0
def test_to_dict():
    modin_df = create_test_modin_dataframe()
    assert modin_df.to_dict() == to_pandas(modin_df).to_dict()
Beispiel #9
0
def ray_df_equals_pandas(ray_df, pandas_df):
    return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
Beispiel #10
0
def test_to_latex():
    ray_df = create_test_ray_dataframe()
    assert ray_df.to_latex() == to_pandas(ray_df).to_latex()
Beispiel #11
0
def ray_df_equals(ray_df1, ray_df2):
    assert to_pandas(ray_df1).equals(to_pandas(ray_df2))
Beispiel #12
0
def df_equals(df1, df2):
    """Tests if df1 and df2 are equal.

    Args:
        df1: (pandas or modin DataFrame or series) dataframe to test if equal.
        df2: (pandas or modin DataFrame or series) dataframe to test if equal.

    Returns:
        True if df1 is equal to df2.
    """
    types_for_almost_equals = (
        pandas.core.indexes.range.RangeIndex,
        pandas.core.indexes.base.Index,
    )

    # Gets AttributError if modin's groupby object is not import like this
    from modin.pandas.groupby import DataFrameGroupBy

    groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy)

    # The typing behavior of how pandas treats its index is not consistent when the
    # length of the DataFrame or Series is 0, so we just verify that the contents are
    # the same.
    if (hasattr(df1, "index") and hasattr(df2, "index") and len(df1) == 0
            and len(df2) == 0):
        if type(df1).__name__ == type(df2).__name__:
            if hasattr(df1, "name") and hasattr(
                    df2, "name") and df1.name == df2.name:
                return
            if (hasattr(df1, "columns") and hasattr(df2, "columns")
                    and df1.columns.equals(df2.columns)):
                return
        assert False

    if isinstance(df1, (list, tuple)) and all(
            isinstance(d, (pd.DataFrame, pd.Series, pandas.DataFrame,
                           pandas.Series)) for d in df1):
        assert isinstance(df2, type(df1)), "Different type of collection"
        assert len(df1) == len(df2), "Different length result"
        return (df_equals(d1, d2) for d1, d2 in zip(df1, df2))

    # Convert to pandas
    if isinstance(df1, (pd.DataFrame, pd.Series)):
        df1 = to_pandas(df1)
    if isinstance(df2, (pd.DataFrame, pd.Series)):
        df2 = to_pandas(df2)

    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
        if (df1.empty and not df2.empty) or (df2.empty and not df1.empty):
            return False
        elif df1.empty and df2.empty and type(df1) != type(df2):
            return False

    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
        assert_frame_equal(
            df1,
            df2,
            check_dtype=False,
            check_datetimelike_compat=True,
            check_index_type=False,
            check_column_type=False,
            check_categorical=False,
        )
        df_categories_equals(df1, df2)
    elif isinstance(df1, types_for_almost_equals) and isinstance(
            df2, types_for_almost_equals):
        assert_almost_equal(df1, df2, check_dtype=False)
    elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series):
        assert_almost_equal(df1,
                            df2,
                            check_dtype=False,
                            check_series_type=False)
    elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types):
        for g1, g2 in zip(df1, df2):
            assert g1[0] == g2[0]
            df_equals(g1[1], g2[1])
    elif (isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series)
          and df1.empty and df2.empty):
        assert all(df1.index == df2.index)
        assert df1.dtypes == df2.dtypes
    elif isinstance(df1, pandas.core.arrays.numpy_.PandasArray):
        assert isinstance(df2, pandas.core.arrays.numpy_.PandasArray)
        assert df1 == df2
    else:
        if df1 != df2:
            np.testing.assert_almost_equal(df1, df2)
Beispiel #13
0
def modin_df_equals_pandas(modin_df, pandas_df):
    return to_pandas(modin_df).sort_index().equals(pandas_df.sort_index())
Beispiel #14
0
def ray_df_equals_pandas(ray_df, pandas_df):
    assert isinstance(ray_df, pd.DataFrame)
    assert to_pandas(ray_df).equals(pandas_df) or (all(ray_df.isna().all()) and
                                                   all(pandas_df.isna().all()))
Beispiel #15
0
def ray_df_almost_equals_pandas(ray_df, pandas_df):
    assert isinstance(ray_df, pd.DataFrame)
    difference = to_pandas(ray_df) - pandas_df
    diff_max = difference.max().max()
    assert to_pandas(ray_df).equals(pandas_df) or diff_max < 0.0001
Beispiel #16
0
def test_to_gbq():
    modin_df = create_test_ray_dataframe()
    pandas_df = create_test_pandas_dataframe()
    # Because we default to pandas, we can just test the equality of the two frames.
    assert to_pandas(modin_df).equals(pandas_df)
Beispiel #17
0
def df_equals(df1, df2):
    """Tests if df1 and df2 are equal.

    Args:
        df1: (pandas or modin DataFrame or series) dataframe to test if equal.
        df2: (pandas or modin DataFrame or series) dataframe to test if equal.

    Returns:
        True if df1 is equal to df2.
    """
    types_for_almost_equals = (
        pandas.core.indexes.range.RangeIndex,
        pandas.core.indexes.base.Index,
    )

    # Gets AttributError if modin's groupby object is not import like this
    from modin.pandas.groupby import DataFrameGroupBy

    groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy)

    # Convert to pandas
    if isinstance(df1, pd.DataFrame):
        df1 = to_pandas(df1)
    if isinstance(df2, pd.DataFrame):
        df2 = to_pandas(df2)

    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
        if (df1.empty and not df2.empty) or (df2.empty and not df1.empty):
            return False
        elif df1.empty and df2.empty and type(df1) != type(df2):
            return False

    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):
        try:
            assert_frame_equal(
                df1.sort_index(axis=1),
                df2.sort_index(axis=1),
                check_dtype=False,
                check_datetimelike_compat=True,
                check_index_type=False,
            )
        except Exception:
            assert_frame_equal(
                df1,
                df2,
                check_dtype=False,
                check_datetimelike_compat=True,
                check_index_type=False,
            )
    elif isinstance(df1, types_for_almost_equals) and isinstance(
        df2, types_for_almost_equals
    ):
        assert_almost_equal(df1, df2, check_dtype=False)
    elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series):
        assert_almost_equal(df1, df2, check_dtype=False, check_series_type=False)
    elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types):
        for g1, g2 in zip(df1, df2):
            assert g1[0] == g2[0]
            df_equals(g1[1], g2[1])
    elif (
        isinstance(df1, pandas.Series)
        and isinstance(df2, pandas.Series)
        and df1.empty
        and df2.empty
    ):
        assert all(df1.index == df2.index)
        assert df1.dtypes == df2.dtypes
    else:
        assert df1 == df2
Beispiel #18
0
def test_boxplot(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)  # noqa F841

    assert modin_df.boxplot() == to_pandas(modin_df).boxplot()
Beispiel #19
0
 ("corr", None),
 ("expanding", None),
 ("corrwith", lambda df: {
     "other": df
 }),
 ("explode", lambda df: {
     "column": df.columns[0]
 }),
 ("ewm", lambda df: {
     "com": 0.5
 }),
 ("from_dict", lambda df: {
     "data": None
 }),
 ("from_records", lambda df: {
     "data": to_pandas(df)
 }),
 ("hist", lambda df: {
     "column": "int_col"
 }),
 ("infer_objects", None),
 ("interpolate", None),
 ("lookup", lambda df: {
     "row_labels": [0],
     "col_labels": ["int_col"]
 }),
 ("mask", lambda df: {
     "cond": df != 0
 }),
 ("pct_change", None),
 ("__getstate__", None),
Beispiel #20
0
def test_to_latex():
    modin_df = create_test_ray_dataframe()
    assert modin_df.to_latex() == to_pandas(modin_df).to_latex()
Beispiel #21
0
def ray_df_equals_pandas(ray_df, pandas_df):
    assert isinstance(ray_df, pd.DataFrame)
    assert to_pandas(ray_df).equals(pandas_df)