@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("key", indices_values, ids=indices_keys) def test_get(data, key): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) df_equals(modin_df.get(key), pandas_df.get(key)) df_equals(modin_df.get(key, default="default"), pandas_df.get(key, default="default")) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("dummy_na", bool_arg_values, ids=arg_keys("dummy_na", bool_arg_keys)) @pytest.mark.parametrize("drop_first", bool_arg_values, ids=arg_keys("drop_first", bool_arg_keys)) def test_get_dummies(request, data, dummy_na, drop_first): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) try: pandas_result = pandas.get_dummies(pandas_df, dummy_na=dummy_na, drop_first=drop_first) except Exception as e: with pytest.raises(type(e)): pd.get_dummies(modin_df, dummy_na=dummy_na, drop_first=drop_first) else:
generate_multiindex, test_data_diff_dtype, ) from modin.config import NPartitions NPartitions.put(4) # Force matplotlib to not use any Xwindows backend. matplotlib.use("Agg") @pytest.mark.parametrize("method", ["all", "any"]) @pytest.mark.parametrize("is_transposed", [False, True]) @pytest.mark.parametrize("skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys)) @pytest.mark.parametrize("axis", axis_values, ids=axis_keys) @pytest.mark.parametrize("data", [test_data["float_nan_data"]]) def test_all_any(data, axis, skipna, is_transposed, method): eval_general( *create_test_dfs(data), lambda df: getattr((df.T if is_transposed else df), method) (axis=axis, skipna=skipna, bool_only=None), ) @pytest.mark.parametrize("method", ["all", "any"]) @pytest.mark.parametrize("bool_only", bool_arg_values, ids=arg_keys("bool_only", bool_arg_keys)) def test_all_any_specific(bool_only, method):
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("key", indices_values, ids=indices_keys) def test_get(data, key): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) df_equals(modin_df.get(key), pandas_df.get(key)) df_equals( modin_df.get(key, default="default"), pandas_df.get(key, default="default") ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize( "dummy_na", bool_arg_values, ids=arg_keys("dummy_na", bool_arg_keys) ) @pytest.mark.parametrize( "drop_first", bool_arg_values, ids=arg_keys("drop_first", bool_arg_keys) ) def test_get_dummies(request, data, dummy_na, drop_first): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) try: pandas_result = pandas.get_dummies( pandas_df, dummy_na=dummy_na, drop_first=drop_first ) except Exception as e: with pytest.raises(type(e)): pd.get_dummies(modin_df, dummy_na=dummy_na, drop_first=drop_first)
rows_number = len(next(iter( data.values()))) # length of the first data column level_0 = np.random.choice([0, 1, 2], rows_number) level_1 = np.random.choice([3, 4, 5], rows_number) index = pd.MultiIndex.from_arrays([level_0, level_1]) eval_general( pd.DataFrame(data, index=index), pandas.DataFrame(data, index=index), lambda df, *args, **kwargs: df.apply(func, *args, **kwargs), **func_kwargs, ) @pytest.mark.parametrize("column", ["A", ["A", "C"]], ids=arg_keys("column", ["A", ["A", "C"]])) @pytest.mark.parametrize("ignore_index", bool_arg_values, ids=arg_keys("ignore_index", bool_arg_keys)) def test_explode_single_partition(column, ignore_index): # This test data has two columns where some items are lists that # explode() should expand. In some rows, the columns have list-like # elements that must be expanded, and in others, they have empty lists # or items that aren't list-like at all. data = { "A": [[0, 1, 2], "foo", [], [3, 4]], "B": 1, "C": [["a", "b", "c"], np.nan, [], ["d", "e"]], } eval_general( *create_test_dfs(data),
df_equals( modin_df.asof(modin_where.values[0], subset=subset), pandas_df.asof(pandas_where.values[0], subset=subset), ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) def test_first_valid_index(data): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) assert modin_df.first_valid_index() == (pandas_df.first_valid_index()) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("n", int_arg_values, ids=arg_keys("n", int_arg_keys)) def test_head(data, n): # Test normal dataframe head modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) df_equals(modin_df.head(n), pandas_df.head(n)) df_equals(modin_df.head(len(modin_df) + 1), pandas_df.head(len(pandas_df) + 1)) # Test head when we call it from a QueryCompilerView modin_result = modin_df.loc[:, ["col1", "col3", "col3"]].head(n) pandas_result = pandas_df.loc[:, ["col1", "col3", "col3"]].head(n) df_equals(modin_result, pandas_result) @pytest.mark.skip(reason="Defaulting to Pandas")
# Named Series promoted to DF s = pd.Series(frame_data2.get("col1")) with pytest.raises(ValueError): modin_df.merge(s) s = pd.Series(frame_data2.get("col1"), name="col1") df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]])) with pytest.raises(TypeError): modin_df.merge("Non-valid type") @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("ascending", bool_arg_values, ids=arg_keys("ascending", bool_arg_keys)) @pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"]) def test_sort_index(axis, ascending, na_position): data = test_data["float_nan_data"] modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) # Change index value so sorting will actually make a difference if axis == 0: length = len(modin_df.index) for df in [modin_df, pandas_df]: df.index = [(i - length / 2) % length for i in range(length)] # Add NaNs to sorted index for df in [modin_df, pandas_df]: sort_index = df.axes[axis]
create_test_dfs, generate_multiindex, test_data_diff_dtype, ) from modin.config import NPartitions NPartitions.put(4) # Force matplotlib to not use any Xwindows backend. matplotlib.use("Agg") @pytest.mark.parametrize("method", ["all", "any"]) @pytest.mark.parametrize("is_transposed", [False, True]) @pytest.mark.parametrize( "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys) ) @pytest.mark.parametrize("axis", axis_values, ids=axis_keys) @pytest.mark.parametrize("data", [test_data["float_nan_data"]]) def test_all_any(data, axis, skipna, is_transposed, method): eval_general( *create_test_dfs(data), lambda df: getattr((df.T if is_transposed else df), method)( axis=axis, skipna=skipna, bool_only=None ), ) @pytest.mark.parametrize("method", ["all", "any"]) @pytest.mark.parametrize( "bool_only", bool_arg_values, ids=arg_keys("bool_only", bool_arg_keys)
# Named Series promoted to DF s = pd.Series(frame_data2.get("col1")) with pytest.raises(ValueError): modin_df.merge(s) s = pd.Series(frame_data2.get("col1"), name="col1") df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]])) with pytest.raises(TypeError): modin_df.merge("Non-valid type") @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "ascending", bool_arg_values, ids=arg_keys("ascending", bool_arg_keys) ) @pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"]) def test_sort_index(axis, ascending, na_position): data = test_data["float_nan_data"] modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) # Change index value so sorting will actually make a difference if axis == 0: length = len(modin_df.index) for df in [modin_df, pandas_df]: df.index = [(i - length / 2) % length for i in range(length)] # Add NaNs to sorted index for df in [modin_df, pandas_df]: sort_index = df.axes[axis]
int_arg_values, test_data, eval_general, create_test_dfs, test_data_diff_dtype, ) pd.DEFAULT_NPARTITIONS = 4 # Force matplotlib to not use any Xwindows backend. matplotlib.use("Agg") @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys) ) @pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"]) def test_cumprod_cummin_cummax_cumsum(axis, skipna, method): eval_general( *create_test_dfs(test_data["float_nan_data"]), lambda df: getattr(df, method)(axis=axis, skipna=skipna), ) @pytest.mark.parametrize("axis", ["rows", "columns"]) @pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"]) def test_cumprod_cummin_cummax_cumsum_transposed(axis, method): eval_general( *create_test_dfs(test_data["int_data"]), lambda df: getattr(df.T, method)(axis=axis),
eval_general, create_test_dfs, test_data_diff_dtype, ) from modin.config import NPartitions NPartitions.put(4) # Force matplotlib to not use any Xwindows backend. matplotlib.use("Agg") @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys)) @pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"]) def test_cumprod_cummin_cummax_cumsum(axis, skipna, method): eval_general( *create_test_dfs(test_data["float_nan_data"]), lambda df: getattr(df, method)(axis=axis, skipna=skipna), ) @pytest.mark.parametrize("axis", ["rows", "columns"]) @pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"]) def test_cumprod_cummin_cummax_cumsum_transposed(axis, method): eval_general( *create_test_dfs(test_data["int_data"]), lambda df: getattr(df.T, method)(axis=axis), )