def test_from_partitions(axis, index, columns, row_lengths, column_widths): num_rows = 2**16 num_cols = 2**8 data = np.random.randint(0, 100, size=(num_rows, num_cols)) df1, df2 = pandas.DataFrame(data), pandas.DataFrame(data) expected_df = pandas.concat([df1, df2], axis=1 if axis is None else axis) index = expected_df.index if index == "index" else None columns = expected_df.columns if columns == "columns" else None row_lengths = (None if row_lengths is None else [num_rows, num_rows] if axis == 0 else [num_rows]) column_widths = (None if column_widths is None else [num_cols] if axis == 0 else [num_cols, num_cols]) if Engine.get() == "Ray": if axis is None: futures = [[ray.put(df1), ray.put(df2)]] else: futures = [ray.put(df1), ray.put(df2)] if Engine.get() == "Dask": client = default_client() if axis is None: futures = [client.scatter([df1, df2], hash=False)] else: futures = client.scatter([df1, df2], hash=False) actual_df = from_partitions( futures, axis, index=index, columns=columns, row_lengths=row_lengths, column_widths=column_widths, ) df_equals(expected_df, actual_df)
def test_unwrap_partitions(axis): data = np.random.randint(0, 100, size=(2**16, 2**8)) df = pd.DataFrame(data) if axis is None: expected_partitions = df._query_compiler._modin_frame._partitions actual_partitions = np.array(unwrap_partitions(df, axis=axis)) assert (expected_partitions.shape[0] == actual_partitions.shape[0] and expected_partitions.shape[1] == expected_partitions.shape[1]) for row_idx in range(expected_partitions.shape[0]): for col_idx in range(expected_partitions.shape[1]): if Engine.get() == "Ray": assert (expected_partitions[row_idx][col_idx].oid == actual_partitions[row_idx][col_idx]) if Engine.get() == "Dask": assert (expected_partitions[row_idx][col_idx].future == actual_partitions[row_idx][col_idx]) else: expected_axis_partitions = ( df._query_compiler._modin_frame._partition_mgr_cls.axis_partition( df._query_compiler._modin_frame._partitions, axis ^ 1)) expected_axis_partitions = [ axis_partition.force_materialization().unwrap(squeeze=True) for axis_partition in expected_axis_partitions ] actual_axis_partitions = unwrap_partitions(df, axis=axis) assert len(expected_axis_partitions) == len(actual_axis_partitions) for item_idx in range(len(expected_axis_partitions)): if Engine.get() in ["Ray", "Dask"]: df_equals( get_func(expected_axis_partitions[item_idx]), get_func(actual_axis_partitions[item_idx]), )
def __update_engine(self, _): if Engine.get() in REMOTE_ENGINES: from modin.experimental.cloud import get_connection self.__swap_numpy(get_connection().modules["numpy"]) else: self.__swap_numpy()
def test_from_sql_distributed(make_sql_connection): # noqa: F811 if Engine.get() == "Ray": filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df_from_query = pd.read_sql( query, conn, partition_column="col1", lower_bound=0, upper_bound=6, max_sessions=2, ) modin_df_from_table = pd.read_sql( table, conn, partition_column="col1", lower_bound=0, upper_bound=6, max_sessions=2, ) df_equals(modin_df_from_query, pandas_df) df_equals(modin_df_from_table, pandas_df)
def update_class(_): if Engine.get() in REMOTE_ENGINES: from . import rpyc_proxy result.__real_cls__ = getattr(rpyc_proxy, rpyc_wrapper_name)(result) else: result.__real_cls__ = result
def _read_sql(cls, **kwargs): if Engine.get() != "Ray": if "partition_column" in kwargs: if kwargs["partition_column"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["partition_column"] if "lower_bound" in kwargs: if kwargs["lower_bound"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["lower_bound"] if "upper_bound" in kwargs: if kwargs["upper_bound"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["upper_bound"] if "max_sessions" in kwargs: if kwargs["max_sessions"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["max_sessions"] return cls.io_cls.read_sql(**kwargs)
def _update_factory(cls, _): """ Update and prepare factory with a new one specified via Modin config. Parameters ---------- _ : object This parameters serves the compatibility purpose. Does not affect the result. """ factory_name = get_current_backend() + "Factory" try: cls.__factory = getattr(factories, factory_name) except AttributeError: if not IsExperimental.get(): # allow missing factories in experimenal mode only if hasattr(factories, "Experimental" + factory_name): msg = ( "{0} on {1} is only accessible through the experimental API.\nRun " "`import modin.experimental.pandas as pd` to use {0} on {1}." ) else: msg = ( "Cannot find a factory for partition '{}' and execution engine '{}'. " "Potential reason might be incorrect environment variable value for " f"{Backend.varname} or {Engine.varname}") raise FactoryNotFoundError( msg.format(Backend.get(), Engine.get())) cls.__factory = StubFactory.set_failing_name(factory_name) else: cls.__factory.prepare()
def test_from_partitions(axis): data = np.random.randint(0, 100, size=(2**16, 2**8)) df1, df2 = pandas.DataFrame(data), pandas.DataFrame(data) expected_df = pandas.concat([df1, df2], axis=1 if axis is None else axis) if Engine.get() == "Ray": if axis is None: futures = [[ray.put(df1), ray.put(df2)]] else: futures = [ray.put(df1), ray.put(df2)] if Engine.get() == "Dask": client = get_client() if axis is None: futures = [client.scatter([df1, df2], hash=False)] else: futures = client.scatter([df1, df2], hash=False) actual_df = from_partitions(futures, axis) df_equals(expected_df, actual_df)
def train( params: Dict, dtrain: DMatrix, *args, evals=(), num_actors: Optional[int] = None, evals_result: Optional[Dict] = None, **kwargs, ): """ Run distributed training of XGBoost model. During work it evenly distributes `dtrain` between workers according to IP addresses partitions (in case of not even distribution of `dtrain` over nodes, some partitions will be re-distributed between nodes), runs xgb.train on each worker for subset of `dtrain` and reduces training results of each worker using Rabit Context. Parameters ---------- params : dict Booster params. dtrain : modin.experimental.xgboost.DMatrix Data to be trained against. *args : iterable Other parameters for `xgboost.train`. evals : list of pairs (modin.experimental.xgboost.DMatrix, str), default: empty List of validation sets for which metrics will evaluated during training. Validation metrics will help us track the performance of the model. num_actors : int, optional Number of actors for training. If unspecified, this value will be computed automatically. evals_result : dict, optional Dict to store evaluation results in. **kwargs : dict Other parameters are the same as `xgboost.train`. Returns ------- modin.experimental.xgboost.Booster A trained booster. """ LOGGER.info("Training started") if Engine.get() == "Ray": from .xgboost_ray import _train else: raise ValueError("Current version supports only Ray engine.") assert isinstance( dtrain, DMatrix ), f"Type of `dtrain` is {type(dtrain)}, but expected {DMatrix}." result = _train(dtrain, num_actors, params, *args, evals=evals, **kwargs) if isinstance(evals_result, dict): evals_result.update(result["history"]) LOGGER.info("Training finished") return Booster(model_file=result["booster"])
def train( params: Dict, dtrain: DMatrix, *args, evals=(), num_actors: Optional[int] = None, evals_result: Optional[Dict] = None, **kwargs, ): """ Train XGBoost model. Parameters ---------- params : dict Booster params. dtrain : DMatrix Data to be trained against. evals: list of pairs (DMatrix, string) List of validation sets for which metrics will evaluated during training. Validation metrics will help us track the performance of the model. num_actors : int. Default is None Number of actors for training. If it's None, this value will be computed automatically. evals_result : dict. Default is None Dict to store evaluation results in. \\*\\*kwargs : Other parameters are the same as `xgboost.train`. Returns ------- ``modin.experimental.xgboost.Booster`` A trained booster. """ LOGGER.info("Training started") if Engine.get() == "Ray": from .xgboost_ray import _train else: raise ValueError("Current version supports only Ray engine.") assert isinstance( dtrain, DMatrix ), f"Type of `dtrain` is {type(dtrain)}, but expected {DMatrix}." result = _train(dtrain, num_actors, params, *args, evals=evals, **kwargs) if isinstance(evals_result, dict): evals_result.update(result["history"]) LOGGER.info("Training finished") return Booster(model_file=result["booster"])
def predict( model, data: ModinDMatrix, nthread: Optional[int] = cpu_count(), evenly_data_distribution: Optional[bool] = True, **kwargs, ): """ Run prediction with a trained booster. Parameters ---------- model : A Booster or a dictionary returned by `modin.experimental.xgboost.train`. The trained model. data : ModinDMatrix. Input data used for prediction. nthread : int Number of threads for using in each node. By default it is equal to number of threads on master node. evenly_data_distribution : boolean, default True Whether make evenly distribution of partitions between nodes or not. In case `False` minimal datatransfer between nodes will be provided but the data may not be evenly distributed. Returns ------- numpy.array Array with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") if isinstance(model, xgb.Booster): booster = model elif isinstance(model, dict): booster = model["booster"] else: raise TypeError( f"Expected types for `model` xgb.Booster or dict, but presented type is {type(model)}" ) result = _predict(booster, data, nthread, evenly_data_distribution, **kwargs) LOGGER.info("Prediction finished") return result
def train( params: Dict, dtrain: ModinDMatrix, *args, evals=(), nthread: Optional[int] = cpu_count(), **kwargs, ): """ Train XGBoost model. Parameters ---------- params : dict Booster params. dtrain : ModinDMatrix Data to be trained against. evals: list of pairs (ModinDMatrix, string) List of validation sets for which metrics will evaluated during training. Validation metrics will help us track the performance of the model. nthread : int. Default is number of threads on master node Number of threads for using in each node. \\*\\*kwargs : Other parameters are the same as `xgboost.train` except for `evals_result`, which is returned as part of function return value instead of argument. Returns ------- dict A dictionary containing trained booster and evaluation history. `history` field is the same as `eval_result` from `xgboost.train`. .. code-block:: python {'booster': xgboost.Booster, 'history': {'train': {'logloss': ['0.48253', '0.35953']}, 'eval': {'logloss': ['0.480385', '0.357756']}}} """ LOGGER.info("Training started") if Engine.get() == "Ray": from .xgboost_ray import _train else: raise ValueError("Current version supports only Ray engine.") result = _train(dtrain, nthread, params, *args, evals=evals, **kwargs) LOGGER.info("Training finished") return result
def test_from_sql_distributed(make_sql_connection): # noqa: F811 if Engine.get() == "Ray": pytest.xfail("Distributed read_sql is broken, see GH#2194") filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df_from_query = pd.read_sql( query, conn, partition_column="col1", lower_bound=0, upper_bound=6 ) modin_df_from_table = pd.read_sql( table, conn, partition_column="col1", lower_bound=0, upper_bound=6 ) df_equals(modin_df_from_query, pandas_df) df_equals(modin_df_from_table, pandas_df)
def predict( self, data: DMatrix, num_actors: Optional[int] = None, **kwargs, ): """ Run distributed prediction with a trained booster. During work it evenly distributes `data` between workers, runs xgb.predict on each worker for subset of `data` and creates Modin DataFrame with prediction results. Parameters ---------- data : modin.experimental.xgboost.DMatrix Input data used for prediction. num_actors : int, optional Number of actors for prediction. If unspecified, this value will be computed automatically. **kwargs : dict Other parameters are the same as `xgboost.Booster.predict`. Returns ------- modin.pandas.DataFrame Modin DataFrame with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") assert isinstance( data, DMatrix ), f"Type of `data` is {type(data)}, but expected {DMatrix}." result = _predict(self.copy(), data, num_actors, **kwargs) LOGGER.info("Prediction finished") return result
def predict( model, data: ModinDMatrix, nthread: Optional[int] = cpu_count(), **kwargs, ): """ Run prediction with a trained booster. Parameters ---------- model : A Booster or a dictionary returned by `modin.experimental.xgboost.train` The trained model. data : ModinDMatrix Input data used for prediction. nthread : int. Default is number of threads on master node Number of threads for using in each node. Returns ------- modin.pandas.DataFrame Modin DataFrame with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") if isinstance(model, xgb.Booster): booster = model elif isinstance(model, dict): booster = model["booster"] else: raise TypeError( f"Expected types for `model` xgb.Booster or dict, but presented type is {type(model)}" ) result = _predict(booster, data, nthread, **kwargs) LOGGER.info("Prediction finished") return result
def _update_engine(cls, _): factory_name = get_current_backend() + "Factory" try: cls.__engine = getattr(factories, factory_name) except AttributeError: if not IsExperimental.get(): # allow missing factories in experimenal mode only if hasattr(factories, "Experimental" + factory_name): msg = ( "{0} on {1} is only accessible through the experimental API.\nRun " "`import modin.experimental.pandas as pd` to use {0} on {1}." ) else: msg = ( "Cannot find a factory for partition '{}' and execution engine '{}'. " "Potential reason might be incorrect environment variable value for " f"{Backend.varname} or {Engine.varname}" ) raise FactoryNotFoundError(msg.format(Backend.get(), Engine.get())) cls.__engine = StubFactory.set_failing_name(factory_name) else: cls.__engine.prepare()
def predict( self, data: DMatrix, num_actors: Optional[int] = None, **kwargs, ): """ Run prediction with a trained booster. Parameters ---------- data : DMatrix Input data used for prediction. num_actors : int. Default is None Number of actors for prediction. If it's None, this value will be computed automatically. \\*\\*kwargs : Other parameters are the same as `xgboost.Booster.predict`. Returns ------- ``modin.pandas.DataFrame`` Modin DataFrame with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") assert isinstance( data, DMatrix ), f"Type of `data` is {type(data)}, but expected {DMatrix}." result = _predict(self.copy(), data, num_actors, **kwargs) LOGGER.info("Prediction finished") return result
def predict( self, data: DMatrix, **kwargs, ): """ Run distributed prediction with a trained booster. During execution it runs ``xgb.predict`` on each worker for subset of `data` and creates Modin DataFrame with prediction results. Parameters ---------- data : modin.experimental.xgboost.DMatrix Input data used for prediction. **kwargs : dict Other parameters are the same as for ``xgboost.Booster.predict``. Returns ------- modin.pandas.DataFrame Modin DataFrame with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") assert isinstance( data, DMatrix ), f"Type of `data` is {type(data)}, but expected {DMatrix}." result = _predict(self.copy(), data, **kwargs) LOGGER.info("Prediction finished") return result
def predict( self, data: DMatrix, nthread: Optional[int] = cpu_count(), **kwargs, ): """ Run prediction with a trained booster. Parameters ---------- data : DMatrix Input data used for prediction. nthread : int. Default is number of threads on master node Number of threads for using in each node. \\*\\*kwargs : Other parameters are the same as `xgboost.Booster.predict`. Returns ------- ``modin.pandas.DataFrame`` Modin DataFrame with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") assert isinstance( data, DMatrix ), f"Type of `data` is {type(data)}, but expected {DMatrix}." result = _predict(self.copy(), data, nthread, **kwargs) LOGGER.info("Prediction finished") return result
# Licensed to Modin Development Team under one or more contributor license agreements. # See the NOTICE file distributed with this work for additional information regarding # copyright ownership. The Modin Development Team licenses this file to you under the # Apache License, Version 2.0 (the "License"); you may not use this file except in # compliance with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software distributed under # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. import pytest from modin.config import Engine import modin.experimental.xgboost as xgb @pytest.mark.skipif( Engine.get() == "Ray", reason="This test doesn't make sense on Ray backend.", ) @pytest.mark.parametrize("func", ["train", "predict"]) def test_backend(func): try: getattr(xgb, func)({}, xgb.ModinDMatrix(None, None)) except ValueError: pass
import pandas import pytest import modin.experimental.pandas as pd from modin.config import Engine from modin.pandas.test.test_io import ( # noqa: F401 df_equals, eval_io, make_sql_connection, _make_csv_file, teardown_test_files, ) from modin.pandas.test.utils import get_unique_filename @pytest.mark.skipif( Engine.get() == "Dask", reason="Dask does not have experimental API", ) def test_from_sql_distributed(make_sql_connection): # noqa: F811 if Engine.get() == "Ray": pytest.xfail("Distributed read_sql is broken, see GH#2194") filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df_from_query = pd.read_sql( query, conn, partition_column="col1", lower_bound=0, upper_bound=6 ) modin_df_from_table = pd.read_sql(
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. import pandas import pytest import modin.experimental.pandas as pd from modin.config import Engine from modin.pandas.test.test_io import ( # noqa: F401 df_equals, make_sql_connection, ) @pytest.mark.skipif( Engine.get() == "Dask", reason="Dask does not have experimental API", ) def test_from_sql_distributed(make_sql_connection): # noqa: F811 if Engine.get() == "Ray": pytest.xfail("Distributed read_sql is broken, see GH#2194") filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df_from_query = pd.read_sql( query, conn, partition_column="col1", lower_bound=0, upper_bound=6 ) modin_df_from_table = pd.read_sql(
import numpy as np import pandas import pytest import modin.pandas as pd from modin.distributed.dataframe.pandas import unwrap_partitions, from_partitions from modin.config import Engine, NPartitions from modin.pandas.test.utils import df_equals from modin.pandas.indexing import compute_sliced_len from modin.data_management.factories.dispatcher import FactoryDispatcher PartitionClass = (FactoryDispatcher.get_factory().io_cls.frame_cls. _partition_mgr_cls._partition_class) if Engine.get() == "Ray": import ray put_func = ray.put get_func = ray.get FutureType = ray.ObjectRef elif Engine.get() == "Dask": from distributed.client import default_client from distributed import Future put_func = lambda x: default_client().scatter(x) # noqa: E731 get_func = lambda x: x.result() # noqa: E731 FutureType = Future elif Engine.get() == "Python": put_func = lambda x: x # noqa: E731 get_func = lambda x: x # noqa: E731
# # Unless required by applicable law or agreed to in writing, software distributed under # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. import numpy as np import pandas import pytest import modin.pandas as pd from modin.distributed.dataframe.pandas import unwrap_partitions, from_partitions from modin.config import Engine, NPartitions from modin.pandas.test.utils import df_equals if Engine.get() == "Ray": import ray if Engine.get() == "Dask": from distributed.client import get_client NPartitions.put(4) @pytest.mark.parametrize("axis", [None, 0, 1]) def test_unwrap_partitions(axis): data = np.random.randint(0, 100, size=(2**16, 2**8)) df = pd.DataFrame(data) if axis is None: expected_partitions = df._query_compiler._modin_frame._partitions actual_partitions = np.array(unwrap_partitions(df, axis=axis))
def predict( self, data: DMatrix, **kwargs, ): """ Run distributed prediction with a trained booster. During execution it runs ``xgb.predict`` on each worker for subset of `data` and creates Modin DataFrame with prediction results. Parameters ---------- data : modin.experimental.xgboost.DMatrix Input data used for prediction. **kwargs : dict Other parameters are the same as for ``xgboost.Booster.predict``. Returns ------- modin.pandas.DataFrame Modin DataFrame with prediction results. """ LOGGER.info("Prediction started") if Engine.get() == "Ray": from .xgboost_ray import _predict else: raise ValueError("Current version supports only Ray engine.") assert isinstance( data, DMatrix ), f"Type of `data` is {type(data)}, but expected {DMatrix}." if ( self.feature_names is not None and data.feature_names is not None and self.feature_names != data.feature_names ): data_missing = set(self.feature_names) - set(data.feature_names) self_missing = set(data.feature_names) - set(self.feature_names) msg = "feature_names mismatch: {0} {1}" if data_missing: msg += ( "\nexpected " + ", ".join(str(s) for s in data_missing) + " in input data" ) if self_missing: msg += ( "\ntraining data did not have the following fields: " + ", ".join(str(s) for s in self_missing) ) raise ValueError(msg.format(self.feature_names, data.feature_names)) result = _predict(self.copy(), data, **kwargs) LOGGER.info("Prediction finished") return result
import pytest import modin.experimental.pandas as pd from modin.config import Engine from modin.utils import get_current_execution from modin.pandas.test.utils import ( df_equals, get_unique_filename, teardown_test_files, test_data, ) from modin.test.test_utils import warns_that_defaulting_to_pandas from modin.pandas.test.utils import parse_dates_values_by_id, time_parsing_csv_path @pytest.mark.skipif( Engine.get() == "Dask", reason="Dask does not have experimental API", ) def test_from_sql_distributed(make_sql_connection): # noqa: F811 if Engine.get() == "Ray": filename = "test_from_sql_distributed.db" table = "test_from_sql_distributed" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df_from_query = pd.read_sql( query, conn, partition_column="col1", lower_bound=0,
RAND_HIGH = 100 random_state = np.random.RandomState(seed=42) try: from modin.config import NPartitions NPARTITIONS = NPartitions.get() except ImportError: NPARTITIONS = pd.DEFAULT_NPARTITIONS try: from modin.config import TestDatasetSize, AsvImplementation, Engine ASV_USE_IMPL = AsvImplementation.get() ASV_DATASET_SIZE = TestDatasetSize.get() or "Small" ASV_USE_ENGINE = Engine.get() except ImportError: # The same benchmarking code can be run for different versions of Modin, so in # case of an error importing important variables, we'll just use predefined values ASV_USE_IMPL = os.environ.get("MODIN_ASV_USE_IMPL", "modin") ASV_DATASET_SIZE = os.environ.get("MODIN_TEST_DATASET_SIZE", "Small") ASV_USE_ENGINE = os.environ.get("MODIN_ENGINE", "Ray") ASV_USE_IMPL = ASV_USE_IMPL.lower() ASV_DATASET_SIZE = ASV_DATASET_SIZE.lower() ASV_USE_ENGINE = ASV_USE_ENGINE.lower() assert ASV_USE_IMPL in ("modin", "pandas") assert ASV_DATASET_SIZE in ("big", "small") assert ASV_USE_ENGINE in ("ray", "dask", "python")
# compliance with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software distributed under # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. import pytest from modin.config import Engine import modin.experimental.xgboost as xgb import modin.pandas as pd @pytest.mark.skipif( Engine.get() == "Ray", reason="This test doesn't make sense on Ray backend.", ) @pytest.mark.skipif( Engine.get() == "Python", reason= "This test doesn't make sense on not distributed backend (see issue #2938).", ) def test_backend(): try: xgb.train({}, xgb.DMatrix(pd.DataFrame([0]), pd.DataFrame([0]))) except ValueError: pass