def _update_engine(cls, _): if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True": factory_fmt, experimental = "Experimental{}On{}Factory", True else: factory_fmt, experimental = "{}On{}Factory", False factory_name = factory_fmt.format(partition_format.get(), execution_engine.get()) try: cls.__engine = getattr(factories, factory_name) except AttributeError: if not experimental: # allow missing factories in experimenal mode only if hasattr(factories, "Experimental" + factory_name): msg = ( "{0} on {1} is only accessible through the experimental API.\nRun " "`import modin.experimental.pandas as pd` to use {0} on {1}." ) else: msg = ( "Cannot find a factory for partition '{}' and execution engine '{}'. " "Potential reason might be incorrect environment variable value for " "MODIN_BACKEND or MODIN_ENGINE") raise FactoryNotFoundError( msg.format(partition_format.get(), execution_engine.get())) cls.__engine = StubFactory.set_failing_name(factory_name) else: cls.__engine.prepare()
def _read_sql(cls, **kwargs): if execution_engine.get() != "Ray": if "partition_column" in kwargs: if kwargs["partition_column"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["partition_column"] if "lower_bound" in kwargs: if kwargs["lower_bound"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["lower_bound"] if "upper_bound" in kwargs: if kwargs["upper_bound"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["upper_bound"] if "max_sessions" in kwargs: if kwargs["max_sessions"] is not None: warnings.warn( "Distributed read_sql() was only implemented for Ray engine." ) del kwargs["max_sessions"] return cls.io_cls.read_sql(**kwargs)
def __update_engine(self, _): if execution_engine.get() in REMOTE_ENGINES: from modin.experimental.cloud import get_connection self.__swap_numpy(get_connection().modules["numpy"]) else: self.__swap_numpy()
def update_class(_): if execution_engine.get() in REMOTE_ENGINES: from . import rpyc_proxy result.__real_cls__ = getattr(rpyc_proxy, rpyc_wrapper_name)(result) else: result.__real_cls__ = result
def update_class(_): if execution_engine.get() == "Cloudray": from . import rpyc_proxy result.__real_cls__ = getattr(rpyc_proxy, rpyc_wrapper_name)(result) else: result.__real_cls__ = result
pandas_df = pandas.read_table(Path(TEST_CSV_FILENAME)) modin_df = pd.read_table(Path(TEST_CSV_FILENAME)) df_equals(modin_df, pandas_df) @pytest.mark.parametrize("usecols", [["a"], ["a", "b", "e"], [0, 1, 4]]) def test_from_csv_with_usecols(usecols): fname = "modin/pandas/test/data/test_usecols.csv" pandas_df = pandas.read_csv(fname, usecols=usecols) modin_df = pd.read_csv(fname, usecols=usecols) df_equals(modin_df, pandas_df) @pytest.mark.skipif(execution_engine.get().lower() == "python", reason="Using pandas implementation") def test_from_csv_s3(make_csv_file): dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv" pandas_df = pandas.read_csv(dataset_url) # This first load is to trigger all the import deprecation warnings modin_df = pd.read_csv(dataset_url) # This will warn if it defaults to pandas behavior, but it shouldn't with pytest.warns(None) as record: modin_df = pd.read_csv(dataset_url) assert not any("defaulting to pandas implementation" in str(err) for err in record.list)
pandas_df = pandas.read_table(Path(TEST_CSV_FILENAME)) modin_df = pd.read_table(Path(TEST_CSV_FILENAME)) df_equals(modin_df, pandas_df) @pytest.mark.parametrize("usecols", [["a"], ["a", "b", "e"], [0, 1, 4]]) def test_from_csv_with_usecols(usecols): fname = "modin/pandas/test/data/test_usecols.csv" pandas_df = pandas.read_csv(fname, usecols=usecols) modin_df = pd.read_csv(fname, usecols=usecols) df_equals(modin_df, pandas_df) @pytest.mark.skipif( execution_engine.get().lower() == "python", reason="Using pandas implementation" ) def test_from_csv_s3(make_csv_file): dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv" pandas_df = pandas.read_csv(dataset_url) # This first load is to trigger all the import deprecation warnings modin_df = pd.read_csv(dataset_url) # This will warn if it defaults to pandas behavior, but it shouldn't with pytest.warns(None) as record: modin_df = pd.read_csv(dataset_url) assert not any( "defaulting to pandas implementation" in str(err) for err in record.list )