def test_quantile(request, data, q): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if not name_contains(request.node.name, no_numeric_dfs): df_equals(modin_df.quantile(q), pandas_df.quantile(q)) df_equals(modin_df.quantile(q, axis=1), pandas_df.quantile(q, axis=1)) try: pandas_result = pandas_df.quantile(q, axis=1, numeric_only=False) except Exception as e: with pytest.raises(type(e)): modin_df.quantile(q, axis=1, numeric_only=False) else: modin_result = modin_df.quantile(q, axis=1, numeric_only=False) df_equals(modin_result, pandas_result) else: with pytest.raises(ValueError): modin_df.quantile(q) if not name_contains(request.node.name, no_numeric_dfs): df_equals(modin_df.T.quantile(q), pandas_df.T.quantile(q)) df_equals(modin_df.T.quantile(q, axis=1), pandas_df.T.quantile(q, axis=1)) try: pandas_result = pandas_df.T.quantile(q, axis=1, numeric_only=False) except Exception as e: with pytest.raises(type(e)): modin_df.T.quantile(q, axis=1, numeric_only=False) else: modin_result = modin_df.T.quantile(q, axis=1, numeric_only=False) df_equals(modin_result, pandas_result) else: with pytest.raises(ValueError): modin_df.T.quantile(q)
def test_cumsum(request, data, axis, skipna): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) # pandas exhibits weird behavior for this case # Remove this case when we can pull the error messages from backend if name_contains(request.node.name, ["datetime_timedelta_data" ]) and (axis == 0 or axis == "rows"): with pytest.raises(TypeError): modin_df.cumsum(axis=axis, skipna=skipna) else: try: pandas_result = pandas_df.cumsum(axis=axis, skipna=skipna) except Exception as e: with pytest.raises(type(e)): modin_df.cumsum(axis=axis, skipna=skipna) else: modin_result = modin_df.cumsum(axis=axis, skipna=skipna) df_equals(modin_result, pandas_result) if name_contains(request.node.name, ["datetime_timedelta_data" ]) and (axis == 0 or axis == "rows"): with pytest.raises(TypeError): modin_df.T.cumsum(axis=axis, skipna=skipna) else: try: pandas_result = pandas_df.T.cumsum(axis=axis, skipna=skipna) except Exception as e: with pytest.raises(type(e)): modin_df.T.cumsum(axis=axis, skipna=skipna) else: modin_result = modin_df.T.cumsum(axis=axis, skipna=skipna) df_equals(modin_result, pandas_result)
def test_agg_numeric(request, data, axis, func): if name_contains(request.node.name, numeric_agg_funcs) and name_contains( request.node.name, numeric_dfs ): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) try: pandas_result = pandas_df.agg(func, axis) except Exception as e: with pytest.raises(type(e)): modin_df.agg(func, axis) else: modin_result = modin_df.agg(func, axis) df_equals(modin_result, pandas_result)
def test_applymap_numeric(request, data, testfunc): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if name_contains(request.node.name, numeric_dfs): try: pandas_result = pandas_df.applymap(testfunc) except Exception as e: with pytest.raises(type(e)): modin_df.applymap(testfunc) else: modin_result = modin_df.applymap(testfunc) df_equals(modin_result, pandas_result)
def test_iloc(request, data): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if not name_contains(request.node.name, ["empty_data"]): # Scaler np.testing.assert_equal(modin_df.iloc[0, 1], pandas_df.iloc[0, 1]) # Series df_equals(modin_df.iloc[0], pandas_df.iloc[0]) df_equals(modin_df.iloc[1:, 0], pandas_df.iloc[1:, 0]) df_equals(modin_df.iloc[1:2, 0], pandas_df.iloc[1:2, 0]) # DataFrame df_equals(modin_df.iloc[[1, 2]], pandas_df.iloc[[1, 2]]) # See issue #80 # df_equals(modin_df.iloc[[1, 2], [1, 0]], pandas_df.iloc[[1, 2], [1, 0]]) df_equals(modin_df.iloc[1:2, 0:2], pandas_df.iloc[1:2, 0:2]) # Issue #43 modin_df.iloc[0:3, :] # Write Item modin_df.iloc[[1, 2]] = 42 pandas_df.iloc[[1, 2]] = 42 df_equals(modin_df, pandas_df) modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) modin_df.iloc[0] = modin_df.iloc[1] pandas_df.iloc[0] = pandas_df.iloc[1] df_equals(modin_df, pandas_df) modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) modin_df.iloc[:, 0] = modin_df.iloc[:, 1] pandas_df.iloc[:, 0] = pandas_df.iloc[:, 1] df_equals(modin_df, pandas_df) # From issue #1775 df_equals( modin_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])], pandas_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])], ) else: with pytest.raises(IndexError): modin_df.iloc[0, 1]
def test_apply_numeric(request, data, func, axis): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if name_contains(request.node.name, numeric_dfs): try: pandas_result = pandas_df.apply(func, axis) except Exception as e: with pytest.raises(type(e)): modin_df.apply(func, axis) else: modin_result = modin_df.apply(func, axis) df_equals(modin_result, pandas_result) if "empty_data" not in request.node.name: key = modin_df.columns[0] modin_result = modin_df.apply(lambda df: df.drop(key), axis=1) pandas_result = pandas_df.apply(lambda df: df.drop(key), axis=1) df_equals(modin_result, pandas_result)
def test_plot(request, data): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if name_contains(request.node.name, numeric_dfs): # We have to test this way because equality in plots means same object. zipped_plot_lines = zip(modin_df.plot().lines, pandas_df.plot().lines) for left, right in zipped_plot_lines: if isinstance(left.get_xdata(), np.ma.core.MaskedArray) and isinstance( right.get_xdata(), np.ma.core.MaskedArray): assert all((left.get_xdata() == right.get_xdata()).data) else: assert np.array_equal(left.get_xdata(), right.get_xdata()) if isinstance(left.get_ydata(), np.ma.core.MaskedArray) and isinstance( right.get_ydata(), np.ma.core.MaskedArray): assert all((left.get_ydata() == right.get_ydata()).data) else: assert np.array_equal(left.get_xdata(), right.get_xdata())
def test_clip(request, data, axis): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if name_contains(request.node.name, numeric_dfs): ind_len = ( len(modin_df.index) if not pandas.DataFrame()._get_axis_number(axis) else len(modin_df.columns) ) # set bounds lower, upper = np.sort(random_state.random_integers(RAND_LOW, RAND_HIGH, 2)) lower_list = random_state.random_integers(RAND_LOW, RAND_HIGH, ind_len) upper_list = random_state.random_integers(RAND_LOW, RAND_HIGH, ind_len) # test only upper scalar bound modin_result = modin_df.clip(None, upper, axis=axis) pandas_result = pandas_df.clip(None, upper, axis=axis) df_equals(modin_result, pandas_result) # test lower and upper scalar bound modin_result = modin_df.clip(lower, upper, axis=axis) pandas_result = pandas_df.clip(lower, upper, axis=axis) df_equals(modin_result, pandas_result) # test lower and upper list bound on each column modin_result = modin_df.clip(lower_list, upper_list, axis=axis) pandas_result = pandas_df.clip(lower_list, upper_list, axis=axis) df_equals(modin_result, pandas_result) # test only upper list bound on each column modin_result = modin_df.clip(np.nan, upper_list, axis=axis) pandas_result = pandas_df.clip(np.nan, upper_list, axis=axis) df_equals(modin_result, pandas_result) with pytest.raises(ValueError): modin_df.clip(lower=[1, 2, 3], axis=None)
def test_sort_values(request, data, axis, ascending, na_position): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) if "empty_data" not in request.node.name and ( (axis == 0 or axis == "over rows") or name_contains(request.node.name, numeric_dfs)): index = modin_df.index if axis == 1 or axis == "columns" else modin_df.columns key = index[0] modin_result = modin_df.sort_values( key, axis=axis, ascending=ascending, na_position=na_position, inplace=False, ) pandas_result = pandas_df.sort_values( key, axis=axis, ascending=ascending, na_position=na_position, inplace=False, ) df_equals(modin_result, pandas_result) modin_df_cp = modin_df.copy() pandas_df_cp = pandas_df.copy() modin_df_cp.sort_values( key, axis=axis, ascending=ascending, na_position=na_position, inplace=True, ) pandas_df_cp.sort_values( key, axis=axis, ascending=ascending, na_position=na_position, inplace=True, ) df_equals(modin_df_cp, pandas_df_cp) keys = [key, index[-1]] modin_result = modin_df.sort_values( keys, axis=axis, ascending=ascending, na_position=na_position, inplace=False, ) pandas_result = pandas_df.sort_values( keys, axis=axis, ascending=ascending, na_position=na_position, inplace=False, ) df_equals(modin_result, pandas_result) modin_df_cp = modin_df.copy() pandas_df_cp = pandas_df.copy() modin_df_cp.sort_values( keys, axis=axis, ascending=ascending, na_position=na_position, inplace=True, ) pandas_df_cp.sort_values( keys, axis=axis, ascending=ascending, na_position=na_position, inplace=True, ) df_equals(modin_df_cp, pandas_df_cp)