def initialize_dask(): from distributed.client import get_client try: client = get_client() except ValueError: from distributed import Client # The indentation here is intentional, we want the code to be indented. ErrorMessage.not_initialized( "Dask", """ from distributed import Client client = Client() """, ) client = Client(n_workers=CpuCount.get()) num_cpus = len(client.ncores()) NPartitions.put_if_default(num_cpus)
def initialize_ray( override_is_cluster=False, override_redis_address: str = None, override_redis_password: str = None, ): """ Initializes ray based on parameters, environment variables and internal defaults. Parameters ---------- override_is_cluster: bool, optional Whether to override the detection of Moding being run in a cluster and always assume this runs on cluster head node. This also overrides Ray worker detection and always runs the function, not only from main thread. If not specified, $MODIN_RAY_CLUSTER env variable is used. override_redis_address: str, optional What Redis address to connect to when running in Ray cluster. If not specified, $MODIN_REDIS_ADDRESS is used. override_redis_password: str, optional What password to use when connecting to Redis. If not specified, a new random one is generated. """ import ray if not ray.is_initialized() or override_is_cluster: import secrets cluster = override_is_cluster or IsRayCluster.get() redis_address = override_redis_address or RayRedisAddress.get() redis_password = override_redis_password or secrets.token_hex(32) if cluster: # We only start ray in a cluster setting for the head node. ray.init( address=redis_address or "auto", include_dashboard=False, ignore_reinit_error=True, _redis_password=redis_password, logging_level=100, ) else: from modin.error_message import ErrorMessage # This string is intentionally formatted this way. We want it indented in # the warning message. ErrorMessage.not_initialized( "Ray", """ import ray ray.init() """, ) object_store_memory = Memory.get() plasma_directory = RayPlasmaDir.get() if IsOutOfCore.get(): if plasma_directory is None: from tempfile import gettempdir plasma_directory = gettempdir() # We may have already set the memory from the environment variable, we don't # want to overwrite that value if we have. if object_store_memory is None: # Round down to the nearest Gigabyte. mem_bytes = ray.utils.get_system_memory() // 10**9 * 10**9 # Default to 8x memory for out of core object_store_memory = 8 * mem_bytes # In case anything failed above, we can still improve the memory for Modin. if object_store_memory is None: # Round down to the nearest Gigabyte. object_store_memory = int( 0.6 * ray.utils.get_system_memory() // 10**9 * 10**9) # If the memory pool is smaller than 2GB, just use the default in ray. if object_store_memory == 0: object_store_memory = None else: object_store_memory = int(object_store_memory) ray.init( num_cpus=CpuCount.get(), include_dashboard=False, ignore_reinit_error=True, _plasma_directory=plasma_directory, object_store_memory=object_store_memory, address=redis_address, _redis_password=redis_password, logging_level=100, _memory=object_store_memory, _lru_evict=True, ) _move_stdlib_ahead_of_site_packages() ray.worker.global_worker.run_function_on_all_workers( _move_stdlib_ahead_of_site_packages) ray.worker.global_worker.run_function_on_all_workers(_import_pandas) num_cpus = int(ray.cluster_resources()["CPU"]) NPartitions.put_if_default(num_cpus)