def _update_factory(cls, _): """ Update and prepare factory with a new one specified via Modin config. Parameters ---------- _ : object This parameters serves the compatibility purpose. Does not affect the result. """ factory_name = get_current_execution() + "Factory" try: cls.__factory = getattr(factories, factory_name) except AttributeError: if factory_name == "ExperimentalOmnisciOnRayFactory": msg = ("OmniSci storage format no longer needs Ray engine; " "please specify MODIN_ENGINE='native'") raise FactoryNotFoundError(msg) if not IsExperimental.get(): # allow missing factories in experimenal mode only if hasattr(factories, "Experimental" + factory_name): msg = ( "{0} on {1} is only accessible through the experimental API.\nRun " "`import modin.experimental.pandas as pd` to use {0} on {1}." ) else: msg = ( "Cannot find a factory for partition '{}' and execution engine '{}'. " "Potential reason might be incorrect environment variable value for " f"{StorageFormat.varname} or {Engine.varname}") raise FactoryNotFoundError( msg.format(StorageFormat.get(), Engine.get())) cls.__factory = StubFactory.set_failing_name(factory_name) else: cls.__factory.prepare()
def pytest_runtest_call(item): custom_markers = ["xfail", "skip"] # dynamicly adding custom markers to tests for custom_marker in custom_markers: for marker in item.iter_markers(name=f"{custom_marker}_executions"): executions = marker.args[0] if not isinstance(executions, list): executions = [executions] current_execution = get_current_execution() reason = marker.kwargs.pop("reason", "") item.add_marker( getattr(pytest.mark, custom_marker)( condition=current_execution in executions, reason= f"Execution {current_execution} does not pass this test. {reason}", **marker.kwargs, ))
def test_copy(data): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) # noqa F841 # pandas_df is unused but there so there won't be confusing list comprehension # stuff in the pytest.mark.parametrize new_modin_df = modin_df.copy() assert new_modin_df is not modin_df if get_current_execution() != "BaseOnPython": assert np.array_equal( new_modin_df._query_compiler._modin_frame._partitions, modin_df._query_compiler._modin_frame._partitions, ) assert new_modin_df is not modin_df df_equals(new_modin_df, modin_df) # Shallow copy tests modin_df = pd.DataFrame(data) modin_df_cp = modin_df.copy(False) modin_df[modin_df.columns[0]] = 0 df_equals(modin_df, modin_df_cp)
) for i in range(10) ] pandas_df = pd.concat(pandas_dfs) # Indexes get messed up when concatting so we reset both. pandas_df = pandas_df.reset_index(drop=True) modin_df = modin_df.reset_index(drop=True) df_equals(modin_df, pandas_df) test_default_to_pickle_filename = "test_default_to_pickle.pkl" @pytest.mark.skipif( get_current_execution() != "ExperimentalPandasOnRay", reason=f"Execution {get_current_execution()} isn't supported.", ) @pytest.mark.parametrize( "storage_options", [{ "anon": False }, { "anon": True }, { "key": "123", "secret": "123" }, None], ) def test_read_multiple_csv_s3_storage_opts(storage_options): path = "s3://modin-datasets/testing/multiple_csv/"
md_df.index = index md_df.columns = columns pd_df = md_df._to_pandas() for axis in [0, 1]: assert md_df.axes[axis].equals( pd_df.axes[axis]), f"Indices at axis {axis} are different!" assert md_df.axes[axis].equal_levels( pd_df.axes[axis] ), f"Levels of indices at axis {axis} are different!" @pytest.mark.skipif( get_current_execution() != "BaseOnPython", reason="This test make sense only on BaseOnPython execution.", ) @pytest.mark.parametrize( "func, regex", [ (lambda df: df.mean(level=0), r"DataFrame\.mean"), (lambda df: df + df, r"DataFrame\.add"), (lambda df: df.index, r"DataFrame\.get_axis\(0\)"), ( lambda df: df.drop(columns="col1").squeeze().repeat(2), r"Series\.repeat", ), (lambda df: df.groupby("col1").prod(), r"GroupBy\.prod"), (lambda df: df.rolling(1).count(), r"Rolling\.count"), ],
def test_append(data): modin_df = pd.DataFrame(data) pandas_df = pandas.DataFrame(data) data_to_append = {"append_a": 2, "append_b": 1000} ignore_idx_values = [True, False] for ignore in ignore_idx_values: try: pandas_result = pandas_df.append(data_to_append, ignore_index=ignore) except Exception as e: with pytest.raises(type(e)): modin_df.append(data_to_append, ignore_index=ignore) else: modin_result = modin_df.append(data_to_append, ignore_index=ignore) df_equals(modin_result, pandas_result) try: pandas_result = pandas_df.append(pandas_df.iloc[-1]) except Exception as e: with pytest.raises(type(e)): modin_df.append(modin_df.iloc[-1]) else: modin_result = modin_df.append(modin_df.iloc[-1]) df_equals(modin_result, pandas_result) try: pandas_result = pandas_df.append(list(pandas_df.iloc[-1])) except Exception as e: with pytest.raises(type(e)): modin_df.append(list(modin_df.iloc[-1])) else: modin_result = modin_df.append(list(modin_df.iloc[-1])) # Pandas has bug where sort=False is ignored # (https://github.com/pandas-dev/pandas/issues/35092), but Modin # now does the right thing, so for now manually sort to workaround # this. Once the Pandas bug is fixed and Modin upgrades to that # Pandas release, this sort will cause the test to fail, and the # next three lines should be deleted. if get_current_execution() != "BaseOnPython": assert list(modin_result.columns) == list(modin_df.columns) + [0] modin_result = modin_result[[0] + sorted(modin_df.columns)] df_equals(modin_result, pandas_result) verify_integrity_values = [True, False] for verify_integrity in verify_integrity_values: try: pandas_result = pandas_df.append([pandas_df, pandas_df], verify_integrity=verify_integrity) except Exception as e: with pytest.raises(type(e)): modin_df.append([modin_df, modin_df], verify_integrity=verify_integrity) else: modin_result = modin_df.append([modin_df, modin_df], verify_integrity=verify_integrity) df_equals(modin_result, pandas_result) try: pandas_result = pandas_df.append(pandas_df, verify_integrity=verify_integrity) except Exception as e: with pytest.raises(type(e)): modin_df.append(modin_df, verify_integrity=verify_integrity) else: modin_result = modin_df.append(modin_df, verify_integrity=verify_integrity) df_equals(modin_result, pandas_result)