def ray_df_equals_pandas(ray_df, pandas_df): assert isinstance(ray_df, pd.DataFrame) # Order may not match here, but pandas behavior can change, so we will be consistent # ourselves in keeping the columns in the order they were in before the groupby assert (to_pandas(ray_df).equals(pandas_df) or (all(ray_df.isna().all()) and all(pandas_df.isna().all())) or to_pandas(ray_df)[list(pandas_df.columns)].equals(pandas_df))
def modin_df_almost_equals_pandas(modin_df, pandas_df): difference = to_pandas(modin_df) - pandas_df diff_max = difference.max() if isinstance(diff_max, pandas.Series): diff_max = diff_max.max() assert (to_pandas(modin_df).equals(pandas_df) or diff_max < 0.0001 or (all(modin_df.isna().all()) and all(pandas_df.isna().all())))
def ray_df_almost_equals_pandas(ray_df, pandas_df): difference = to_pandas(ray_df) - pandas_df diff_max = difference.max().max() assert ( to_pandas(ray_df).equals(pandas_df) or diff_max < 0.0001 or (all(ray_df.isna().all()) and all(pandas_df.isna().all())) )
def ray_df_equals_pandas(ray_df, pandas_df): assert isinstance(ray_df, pd.DataFrame) # Order may not match here, but pandas behavior can change, so we will be consistent # ourselves in keeping the columns in the order they were in before the groupby try: assert ( to_pandas(ray_df).equals(pandas_df) or (all(ray_df.isna().all()) and all(pandas_df.isna().all())) or to_pandas(ray_df)[list(pandas_df.columns)].equals(pandas_df) ) # Pandas does not seem to be consistent with the way that it handles as_index. # Because the behavior is determined to be non-deterministic we will at least check # that everything else matches if we drop that column from the pandas side. except KeyError: assert to_pandas(ray_df).equals(pandas_df.drop(columns=[ray_df.index.name]))
def modin_df_equals_pandas(modin_df, pandas_df): df1 = to_pandas(modin_df).sort_index() df2 = pandas_df.sort_index() if os.environ.get("MODIN_BACKEND", "Pandas").lower() == "pyarrow": if not df1.dtypes.equals(df2.dtypes): return df2.astype(df1.dtypes).equals(df1) return df1.equals(df2)
def test_where(): frame_data = random_state.randn(100, 10) pandas_df = pandas.DataFrame(frame_data, columns=list("abcdefghij")) modin_df = pd.DataFrame(frame_data, columns=list("abcdefghij")) pandas_cond_df = pandas_df % 5 < 2 modin_cond_df = modin_df % 5 < 2 pandas_result = pandas_df.where(pandas_cond_df, -pandas_df) modin_result = modin_df.where(modin_cond_df, -modin_df) assert all((to_pandas(modin_result) == pandas_result).all()) other = pandas_df.loc[3] pandas_result = pandas_df.where(pandas_cond_df, other, axis=1) modin_result = modin_df.where(modin_cond_df, other, axis=1) assert all((to_pandas(modin_result) == pandas_result).all()) other = pandas_df["e"] pandas_result = pandas_df.where(pandas_cond_df, other, axis=0) modin_result = modin_df.where(modin_cond_df, other, axis=0) assert all((to_pandas(modin_result) == pandas_result).all()) pandas_result = pandas_df.where(pandas_df < 2, True) modin_result = modin_df.where(modin_df < 2, True) assert all((to_pandas(modin_result) == pandas_result).all())
def modin_df_almost_equals_pandas(modin_df, pandas_df): df_categories_equals(modin_df._to_pandas(), pandas_df) modin_df = to_pandas(modin_df) if hasattr(modin_df, "select_dtypes"): modin_df = modin_df.select_dtypes(exclude=["category"]) if hasattr(pandas_df, "select_dtypes"): pandas_df = pandas_df.select_dtypes(exclude=["category"]) difference = modin_df - pandas_df diff_max = difference.max() if isinstance(diff_max, pandas.Series): diff_max = diff_max.max() assert (modin_df.equals(pandas_df) or diff_max < 0.0001 or (all(modin_df.isna().all()) and all(pandas_df.isna().all())))
def test_to_dict(): modin_df = create_test_modin_dataframe() assert modin_df.to_dict() == to_pandas(modin_df).to_dict()
def ray_df_equals_pandas(ray_df, pandas_df): return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
def test_to_latex(): ray_df = create_test_ray_dataframe() assert ray_df.to_latex() == to_pandas(ray_df).to_latex()
def ray_df_equals(ray_df1, ray_df2): assert to_pandas(ray_df1).equals(to_pandas(ray_df2))
def df_equals(df1, df2): """Tests if df1 and df2 are equal. Args: df1: (pandas or modin DataFrame or series) dataframe to test if equal. df2: (pandas or modin DataFrame or series) dataframe to test if equal. Returns: True if df1 is equal to df2. """ types_for_almost_equals = ( pandas.core.indexes.range.RangeIndex, pandas.core.indexes.base.Index, ) # Gets AttributError if modin's groupby object is not import like this from modin.pandas.groupby import DataFrameGroupBy groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy) # The typing behavior of how pandas treats its index is not consistent when the # length of the DataFrame or Series is 0, so we just verify that the contents are # the same. if (hasattr(df1, "index") and hasattr(df2, "index") and len(df1) == 0 and len(df2) == 0): if type(df1).__name__ == type(df2).__name__: if hasattr(df1, "name") and hasattr( df2, "name") and df1.name == df2.name: return if (hasattr(df1, "columns") and hasattr(df2, "columns") and df1.columns.equals(df2.columns)): return assert False if isinstance(df1, (list, tuple)) and all( isinstance(d, (pd.DataFrame, pd.Series, pandas.DataFrame, pandas.Series)) for d in df1): assert isinstance(df2, type(df1)), "Different type of collection" assert len(df1) == len(df2), "Different length result" return (df_equals(d1, d2) for d1, d2 in zip(df1, df2)) # Convert to pandas if isinstance(df1, (pd.DataFrame, pd.Series)): df1 = to_pandas(df1) if isinstance(df2, (pd.DataFrame, pd.Series)): df2 = to_pandas(df2) if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame): if (df1.empty and not df2.empty) or (df2.empty and not df1.empty): return False elif df1.empty and df2.empty and type(df1) != type(df2): return False if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame): assert_frame_equal( df1, df2, check_dtype=False, check_datetimelike_compat=True, check_index_type=False, check_column_type=False, check_categorical=False, ) df_categories_equals(df1, df2) elif isinstance(df1, types_for_almost_equals) and isinstance( df2, types_for_almost_equals): assert_almost_equal(df1, df2, check_dtype=False) elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series): assert_almost_equal(df1, df2, check_dtype=False, check_series_type=False) elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types): for g1, g2 in zip(df1, df2): assert g1[0] == g2[0] df_equals(g1[1], g2[1]) elif (isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series) and df1.empty and df2.empty): assert all(df1.index == df2.index) assert df1.dtypes == df2.dtypes elif isinstance(df1, pandas.core.arrays.numpy_.PandasArray): assert isinstance(df2, pandas.core.arrays.numpy_.PandasArray) assert df1 == df2 else: if df1 != df2: np.testing.assert_almost_equal(df1, df2)
def modin_df_equals_pandas(modin_df, pandas_df): return to_pandas(modin_df).sort_index().equals(pandas_df.sort_index())
def ray_df_equals_pandas(ray_df, pandas_df): assert isinstance(ray_df, pd.DataFrame) assert to_pandas(ray_df).equals(pandas_df) or (all(ray_df.isna().all()) and all(pandas_df.isna().all()))
def ray_df_almost_equals_pandas(ray_df, pandas_df): assert isinstance(ray_df, pd.DataFrame) difference = to_pandas(ray_df) - pandas_df diff_max = difference.max().max() assert to_pandas(ray_df).equals(pandas_df) or diff_max < 0.0001
def test_to_gbq(): modin_df = create_test_ray_dataframe() pandas_df = create_test_pandas_dataframe() # Because we default to pandas, we can just test the equality of the two frames. assert to_pandas(modin_df).equals(pandas_df)
def df_equals(df1, df2): """Tests if df1 and df2 are equal. Args: df1: (pandas or modin DataFrame or series) dataframe to test if equal. df2: (pandas or modin DataFrame or series) dataframe to test if equal. Returns: True if df1 is equal to df2. """ types_for_almost_equals = ( pandas.core.indexes.range.RangeIndex, pandas.core.indexes.base.Index, ) # Gets AttributError if modin's groupby object is not import like this from modin.pandas.groupby import DataFrameGroupBy groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy) # Convert to pandas if isinstance(df1, pd.DataFrame): df1 = to_pandas(df1) if isinstance(df2, pd.DataFrame): df2 = to_pandas(df2) if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame): if (df1.empty and not df2.empty) or (df2.empty and not df1.empty): return False elif df1.empty and df2.empty and type(df1) != type(df2): return False if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame): try: assert_frame_equal( df1.sort_index(axis=1), df2.sort_index(axis=1), check_dtype=False, check_datetimelike_compat=True, check_index_type=False, ) except Exception: assert_frame_equal( df1, df2, check_dtype=False, check_datetimelike_compat=True, check_index_type=False, ) elif isinstance(df1, types_for_almost_equals) and isinstance( df2, types_for_almost_equals ): assert_almost_equal(df1, df2, check_dtype=False) elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series): assert_almost_equal(df1, df2, check_dtype=False, check_series_type=False) elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types): for g1, g2 in zip(df1, df2): assert g1[0] == g2[0] df_equals(g1[1], g2[1]) elif ( isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series) and df1.empty and df2.empty ): assert all(df1.index == df2.index) assert df1.dtypes == df2.dtypes else: assert df1 == df2
def test_boxplot(data): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) # noqa F841 assert modin_df.boxplot() == to_pandas(modin_df).boxplot()
("corr", None), ("expanding", None), ("corrwith", lambda df: { "other": df }), ("explode", lambda df: { "column": df.columns[0] }), ("ewm", lambda df: { "com": 0.5 }), ("from_dict", lambda df: { "data": None }), ("from_records", lambda df: { "data": to_pandas(df) }), ("hist", lambda df: { "column": "int_col" }), ("infer_objects", None), ("interpolate", None), ("lookup", lambda df: { "row_labels": [0], "col_labels": ["int_col"] }), ("mask", lambda df: { "cond": df != 0 }), ("pct_change", None), ("__getstate__", None),
def test_to_latex(): modin_df = create_test_ray_dataframe() assert modin_df.to_latex() == to_pandas(modin_df).to_latex()
def ray_df_equals_pandas(ray_df, pandas_df): assert isinstance(ray_df, pd.DataFrame) assert to_pandas(ray_df).equals(pandas_df)