def init_ray_context_fixture(): from bigdl.dllib.nncontext import init_spark_on_local from bigdl.orca.ray import RayContext sc = init_spark_on_local(cores=4, spark_log_level="INFO") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def test_local(self): node_num = 4 sc = init_spark_on_local(cores=node_num) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() time.sleep(3) # repeat print("-------------------first repeat begin!------------------") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() sc.stop() time.sleep(3) for process_info in ray_ctx.ray_processesMonitor.process_infos: for pid in process_info.pids: assert not psutil.pid_exists(pid)
def test_local(self): @ray.remote class TestRay: def hostname(self): import socket return socket.gethostname() sc = init_spark_on_local(cores=8) ray_ctx = RayContext(sc=sc, object_store_memory="1g", ray_node_cpu_cores=4) address_info = ray_ctx.init() assert "object_store_address" in address_info actors = [TestRay.remote() for i in range(0, 4)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() sc.stop()
num_executors=slave_num, executor_cores=28, executor_memory="10g", driver_memory="2g", driver_cores=4, extra_executor_memory_for_ray="30g", conf={"hello": "world"}) ray_ctx = RayContext(sc=sc, object_store_memory="25g", extra_params={"temp-dir": "/tmp/hello/"}, env={ "http_proxy": "http://child-prc.intel.com:913", "http_proxys": "http://child-prc.intel.com:913" }) ray_ctx.init() @ray.remote class TestRay(): def hostname(self): import socket return socket.gethostname() def check_cv2(self): # conda install -c conda-forge opencv==3.4.2 import cv2 return cv2.__version__ def ip(self): return ray._private.services.get_node_ip_address()
def init_orca_context(cluster_mode=None, cores=2, memory="2g", num_nodes=1, init_ray_on_spark=False, **kwargs): """ Creates or gets a SparkContext for different Spark cluster modes (and launch Ray services across the cluster if necessary). :param cluster_mode: The mode for the Spark cluster. One of "local", "yarn-client", "yarn-cluster", "k8s-client" and "standalone". Default to be None and in this case there is supposed to be an existing SparkContext in your application. For "yarn-client" and "yarn-cluster", you are supposed to use conda environment and set the environment variable HADOOP_CONF_DIR. For "k8s-client", you are supposed to additionally specify the arguments master and container_image. For "k8s-cluster", you are supposed to use spark-submit to submit the application and use the default cluster_mode instead. In this case, please set the Spark configurations through command line options or the properties file. To make things easier, you are recommended to use the launch scripts we provide: https://github.com/intel-analytics/BigDL/tree/branch-2.0/scripts. For other cluster modes, you are recommended to install and run bigdl through pip, which is more convenient. :param cores: The number of cores to be used on each node. Default to be 2. :param memory: The memory allocated for each node. Default to be '2g'. :param num_nodes: The number of nodes to be used in the cluster. Default to be 1. For Spark local, num_nodes should always be 1 and you don't need to change it. :param init_ray_on_spark: Whether to launch Ray services across the cluster. Default to be False and in this case the Ray cluster would be launched lazily when Ray is involved in Project Orca. :param kwargs: The extra keyword arguments used for creating SparkContext and launching Ray if any. :return: An instance of SparkContext. """ print("Initializing orca context") import atexit atexit.register(stop_orca_context) from pyspark import SparkContext import warnings spark_args = {} for key in ["conf", "spark_log_level", "redirect_spark_log"]: if key in kwargs: spark_args[key] = kwargs[key] if cluster_mode is not None: cluster_mode = cluster_mode.lower() activate_sc = SparkContext._active_spark_context is not None if activate_sc: if cluster_mode is not None and cluster_mode != "spark-submit": warnings.warn( "Use an existing SparkContext, " + "cluster_mode is determined by the existing SparkContext", Warning) from bigdl.dllib.nncontext import init_nncontext sc = init_nncontext(conf=None, spark_log_level="WARN", redirect_spark_log=True) else: cluster_mode = "local" if cluster_mode is None else cluster_mode if cluster_mode == "local": if num_nodes > 1: warnings.warn( "For Spark local mode, num_nodes should be 1, but got " + repr(num_nodes) + ", ignored", Warning) os.environ["SPARK_DRIVER_MEMORY"] = memory if "python_location" in kwargs: spark_args["python_location"] = kwargs["python_location"] from bigdl.dllib.nncontext import init_spark_on_local sc = init_spark_on_local(cores, **spark_args) elif cluster_mode == "spark-submit": from bigdl.dllib.nncontext import init_nncontext sc = init_nncontext(**spark_args) elif cluster_mode.startswith( "yarn"): # yarn, yarn-client or yarn-cluster hadoop_conf = os.environ.get("HADOOP_CONF_DIR") if not hadoop_conf: assert "hadoop_conf" in kwargs,\ "Directory path to hadoop conf not found for yarn-client mode. Please either " \ "specify argument hadoop_conf or set the environment variable HADOOP_CONF_DIR" hadoop_conf = kwargs["hadoop_conf"] from bigdl.dllib.utils.utils import detect_conda_env_name conda_env_name = detect_conda_env_name() for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "penv_archive", "additional_archive", "hadoop_user_name", "spark_yarn_archive", "jars" ]: if key in kwargs: spark_args[key] = kwargs[key] from bigdl.dllib.nncontext import init_spark_on_yarn, init_spark_on_yarn_cluster if cluster_mode == "yarn-cluster": sc = init_spark_on_yarn_cluster(hadoop_conf=hadoop_conf, conda_name=conda_env_name, num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) else: sc = init_spark_on_yarn(hadoop_conf=hadoop_conf, conda_name=conda_env_name, num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) elif cluster_mode.startswith("k8s"): # k8s or k8s-client if cluster_mode == "k8s-cluster": raise ValueError( 'For k8s-cluster mode, ' 'please submit the application via spark-submit' 'and use the default cluster_mode instead') assert "master" in kwargs, "Please specify master for k8s-client mode" assert "container_image" in kwargs, "Please specify container_image for k8s-client mode" for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "jars", "python_location" ]: if key in kwargs: spark_args[key] = kwargs[key] from bigdl.dllib.nncontext import init_spark_on_k8s sc = init_spark_on_k8s(master=kwargs["master"], container_image=kwargs["container_image"], num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) elif cluster_mode == "standalone": for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "jars", "master", "python_location", "enable_numa_binding" ]: if key in kwargs: spark_args[key] = kwargs[key] from bigdl.dllib.nncontext import init_spark_standalone sc = init_spark_standalone(num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) else: raise ValueError( "cluster_mode can only be local, yarn-client, yarn-cluster," "k8s-client or standalone, " "but got: %s".format(cluster_mode)) ray_args = {} for key in [ "redis_port", "password", "object_store_memory", "verbose", "env", "extra_params", "num_ray_nodes", "ray_node_cpu_cores", "include_webui" ]: if key in kwargs: ray_args[key] = kwargs[key] from bigdl.orca.ray import RayContext ray_ctx = RayContext(sc, **ray_args) if init_ray_on_spark: driver_cores = 0 # This is the default value. ray_ctx.init(driver_cores=driver_cores) return sc