def _update_engine(publisher: Parameter): global dask_client from modin.config import Backend, CpuCount if publisher.get() == "Ray": from modin.engines.ray.utils import initialize_ray # With OmniSci backend there is only a single worker per node # and we allow it to work on all cores. if Backend.get() == "Omnisci": CpuCount.put(1) os.environ["OMP_NUM_THREADS"] = str(multiprocessing.cpu_count()) if _is_first_update.get("Ray", True): initialize_ray() elif publisher.get() == "Dask": if _is_first_update.get("Dask", True): from modin.engines.dask.utils import initialize_dask initialize_dask() elif publisher.get() == "Cloudray": from modin.experimental.cloud import get_connection conn = get_connection() if _is_first_update.get("Cloudray", True): @conn.teleport def init_remote_ray(partition): from ray import ray_constants import modin from modin.engines.ray.utils import initialize_ray modin.set_backends("Ray", partition) initialize_ray( override_is_cluster=True, override_redis_address= f"localhost:{ray_constants.DEFAULT_PORT}", override_redis_password=ray_constants. REDIS_DEFAULT_PASSWORD, ) init_remote_ray(Backend.get()) # import FactoryDispatcher here to initialize IO class # so it doesn't skew read_csv() timings later on import modin.data_management.factories.dispatcher # noqa: F401 else: get_connection().modules["modin"].set_backends( "Ray", Backend.get()) elif publisher.get() == "Cloudpython": from modin.experimental.cloud import get_connection get_connection().modules["modin"].set_backends("Python") elif publisher.get() not in _NOINIT_ENGINES: raise ImportError("Unrecognized execution engine: {}.".format( publisher.get())) _is_first_update[publisher.get()] = False
def _update_engine(publisher: Parameter): global DEFAULT_NPARTITIONS, dask_client, num_cpus from modin.config import Backend, CpuCount if publisher.get() == "Ray": import ray from modin.engines.ray.utils import initialize_ray # With OmniSci backend there is only a single worker per node # and we allow it to work on all cores. if Backend.get() == "Omnisci": CpuCount.put(1) os.environ["OMP_NUM_THREADS"] = str(multiprocessing.cpu_count()) if _is_first_update.get("Ray", True): initialize_ray() num_cpus = ray.cluster_resources()["CPU"] elif publisher.get() == "Dask": # pragma: no cover from distributed.client import get_client if threading.current_thread( ).name == "MainThread" and _is_first_update.get("Dask", True): import warnings warnings.warn("The Dask Engine for Modin is experimental.") try: dask_client = get_client() except ValueError: from distributed import Client dask_client = Client(n_workers=CpuCount.get()) elif publisher.get() == "Cloudray": from modin.experimental.cloud import get_connection conn = get_connection() remote_ray = conn.modules["ray"] if _is_first_update.get("Cloudray", True): @conn.teleport def init_remote_ray(partition): from ray import ray_constants import modin from modin.engines.ray.utils import initialize_ray modin.set_backends("Ray", partition) initialize_ray( override_is_cluster=True, override_redis_address= f"localhost:{ray_constants.DEFAULT_PORT}", override_redis_password=ray_constants. REDIS_DEFAULT_PASSWORD, ) init_remote_ray(Backend.get()) # import EngineDispatcher here to initialize IO class # so it doesn't skew read_csv() timings later on import modin.data_management.factories.dispatcher # noqa: F401 else: get_connection().modules["modin"].set_backends( "Ray", Backend.get()) num_cpus = remote_ray.cluster_resources()["CPU"] elif publisher.get() == "Cloudpython": from modin.experimental.cloud import get_connection get_connection().modules["modin"].set_backends("Python") elif publisher.get() not in _NOINIT_ENGINES: raise ImportError("Unrecognized execution engine: {}.".format( publisher.get())) _is_first_update[publisher.get()] = False DEFAULT_NPARTITIONS = max(4, int(num_cpus))
def _update_engine(publisher: Parameter): global dask_client from modin.config import StorageFormat, CpuCount from modin.config.envvars import IsExperimental from modin.config.pubsub import ValueSource if (StorageFormat.get() == "Omnisci" and publisher.get_value_source() == ValueSource.DEFAULT): publisher.put("Native") IsExperimental.put(True) elif (publisher.get() == "Native" and StorageFormat.get_value_source() == ValueSource.DEFAULT): StorageFormat.put("Omnisci") IsExperimental.put(True) elif publisher.get() == "Ray": if _is_first_update.get("Ray", True): from modin.core.execution.ray.common.utils import initialize_ray initialize_ray() elif publisher.get() == "Native": # With OmniSci storage format there is only a single worker per node # and we allow it to work on all cores. if StorageFormat.get() == "Omnisci": os.environ["OMP_NUM_THREADS"] = str(CpuCount.get()) else: raise ValueError( f"Storage format should be 'Omnisci' with 'Native' engine, but provided {StorageFormat.get()}." ) elif publisher.get() == "Dask": if _is_first_update.get("Dask", True): from modin.core.execution.dask.common.utils import initialize_dask initialize_dask() elif publisher.get() == "Cloudray": from modin.experimental.cloud import get_connection conn = get_connection() if _is_first_update.get("Cloudray", True): @conn.teleport def init_remote_ray(partition): from ray import ray_constants import modin from modin.core.execution.ray.common.utils import initialize_ray modin.set_execution("Ray", partition) initialize_ray( override_is_cluster=True, override_redis_address= f"localhost:{ray_constants.DEFAULT_PORT}", override_redis_password=ray_constants. REDIS_DEFAULT_PASSWORD, ) init_remote_ray(StorageFormat.get()) # import FactoryDispatcher here to initialize IO class # so it doesn't skew read_csv() timings later on import modin.core.execution.dispatching.factories.dispatcher # noqa: F401 else: get_connection().modules["modin"].set_execution( "Ray", StorageFormat.get()) elif publisher.get() == "Cloudpython": from modin.experimental.cloud import get_connection get_connection().modules["modin"].set_execution("Python") elif publisher.get() == "Cloudnative": from modin.experimental.cloud import get_connection assert ( StorageFormat.get() == "Omnisci" ), f"Storage format should be 'Omnisci' with 'Cloudnative' engine, but provided {StorageFormat.get()}." get_connection().modules["modin"].set_execution("Native", "OmniSci") elif publisher.get() not in _NOINIT_ENGINES: raise ImportError("Unrecognized execution engine: {}.".format( publisher.get())) _is_first_update[publisher.get()] = False