def init_spark_on_k8s(self, master, container_image, num_executors, executor_cores, executor_memory="2g", driver_memory="1g", driver_cores=4, extra_executor_memory_for_ray=None, extra_python_lib=None, conf=None, jars=None, python_location=None): print("Initializing SparkContext for k8s-client mode") python_env = "/".join(detect_python_location().split("/")[:-2]) if "PYSPARK_PYTHON" not in os.environ: os.environ["PYSPARK_PYTHON"] = \ python_location if python_location else detect_python_location() submit_args = "--master " + master + " --deploy-mode client" submit_args = submit_args + gen_submit_args( driver_cores, driver_memory, num_executors, executor_cores, executor_memory, extra_python_lib, jars) conf = enrich_conf_for_spark(conf, driver_cores, driver_memory, num_executors, executor_cores, executor_memory, extra_executor_memory_for_ray) py_version = ".".join(platform.python_version().split(".")[0:2]) preload_so = python_env + "/lib/libpython" + py_version + "m.so" ld_path = python_env + "/lib:" + python_env + "/lib/python" +\ py_version + "/lib-dynload" if "spark.executor.extraLibraryPath" in conf: ld_path = "{}:{}".format(ld_path, conf["spark.executor.extraLibraryPath"]) conf.update({ "spark.cores.max": num_executors * executor_cores, "spark.executorEnv.PYTHONHOME": python_env, "spark.executor.extraLibraryPath": ld_path, "spark.executorEnv.LD_PRELOAD": preload_so, "spark.kubernetes.container.image": container_image }) # Not targeted to use pip install. BIGDL_CLASSPATH is supposed to set. if "BIGDL_CLASSPATH" in os.environ: zoo_bigdl_jar_path = os.environ["BIGDL_CLASSPATH"] else: zoo_bigdl_jar_path = ":".join( list(get_zoo_bigdl_classpath_on_driver())) if "spark.executor.extraClassPath" in conf: conf["spark.executor.extraClassPath"] = "{}:{}".format( zoo_bigdl_jar_path, conf["spark.executor.extraClassPath"]) else: conf["spark.executor.extraClassPath"] = zoo_bigdl_jar_path sc = self.create_sc(submit_args, conf) return sc
def _get_conda_python_path(self): conda_env_path = "/".join(detect_python_location().split("/")[:-2]) python_interpreters = glob.glob( "{}/lib/python*".format(conda_env_path)) assert len(python_interpreters) == 1, \ "Conda env should contain a single python, but got: {}".format(python_interpreters) return python_interpreters[0]
def init_spark_on_local(self, cores, conf=None, python_location=None): print("Start to getOrCreate SparkContext") if "PYSPARK_PYTHON" not in os.environ: os.environ["PYSPARK_PYTHON"] = \ python_location if python_location else detect_python_location() master = "local[{}]".format(cores) zoo_conf = init_spark_conf(conf).setMaster(master) sc = init_nncontext(conf=zoo_conf, spark_log_level=self.spark_log_level, redirect_spark_log=self.redirect_spark_log) print("Successfully got a SparkContext") return sc
def init_spark_on_k8s(self, master, container_image, num_executors, executor_cores, executor_memory="2g", driver_memory="1g", driver_cores=4, extra_executor_memory_for_ray=None, extra_python_lib=None, conf=None, jars=None, python_location=None): print("Initializing SparkContext for k8s-client mode") if "PYSPARK_PYTHON" not in os.environ: os.environ["PYSPARK_PYTHON"] = \ python_location if python_location else detect_python_location() submit_args = "--master " + master + " --deploy-mode client" submit_args = submit_args + gen_submit_args( driver_cores, driver_memory, num_executors, executor_cores, executor_memory, extra_python_lib, jars) conf = enrich_conf_for_spark(conf, driver_cores, driver_memory, num_executors, executor_cores, executor_memory, extra_executor_memory_for_ray) conf.update({ "spark.cores.max": num_executors * executor_cores, "spark.kubernetes.container.image": container_image }) # Not targeted to use pip install. BIGDL_CLASSPATH is supposed to set. if "BIGDL_CLASSPATH" in os.environ: zoo_bigdl_jar_path = os.environ["BIGDL_CLASSPATH"] else: zoo_bigdl_jar_path = ":".join( list(get_zoo_bigdl_classpath_on_driver())) if "spark.executor.extraClassPath" in conf: conf["spark.executor.extraClassPath"] = "{}:{}".format( zoo_bigdl_jar_path, conf["spark.executor.extraClassPath"]) else: conf["spark.executor.extraClassPath"] = zoo_bigdl_jar_path sc = self.create_sc(submit_args, conf) return sc
def init_spark_standalone(self, num_executors, executor_cores, executor_memory="2g", driver_cores=4, driver_memory="1g", master=None, extra_executor_memory_for_ray=None, extra_python_lib=None, conf=None, jars=None, python_location=None, enable_numa_binding=False): import subprocess import pyspark from zoo.util.utils import get_node_ip if "PYSPARK_PYTHON" not in os.environ: os.environ["PYSPARK_PYTHON"] = \ python_location if python_location else detect_python_location() if not master: pyspark_home = os.path.abspath(pyspark.__file__ + "/../") zoo_standalone_home = os.path.abspath( __file__ + "/../../share/bin/standalone") node_ip = get_node_ip() SparkRunner.standalone_env = { "SPARK_HOME": pyspark_home, "ZOO_STANDALONE_HOME": zoo_standalone_home, # If not set this, by default master is hostname but not ip, "SPARK_MASTER_HOST": node_ip } if 'JAVA_HOME' in os.environ: SparkRunner.standalone_env["JAVA_HOME"] = os.environ[ "JAVA_HOME"] # The scripts installed from pip don't have execution permission # and need to first give them permission. pro = subprocess.Popen( ["chmod", "-R", "+x", "{}/sbin".format(zoo_standalone_home)]) os.waitpid(pro.pid, 0) # Start master start_master_pro = subprocess.Popen( "{}/sbin/start-master.sh".format(zoo_standalone_home), shell=True, env=SparkRunner.standalone_env) _, status = os.waitpid(start_master_pro.pid, 0) if status != 0: raise RuntimeError("starting master failed") master = "spark://{}:7077".format( node_ip) # 7077 is the default port # Start worker if enable_numa_binding: worker_script = "start-worker-with-numactl.sh" SparkRunner.standalone_env["SPARK_WORKER_INSTANCES"] = str( num_executors) else: worker_script = "start-worker.sh" start_worker_pro = subprocess.Popen("{}/sbin/{} {}".format( zoo_standalone_home, worker_script, master), shell=True, env=SparkRunner.standalone_env) _, status = os.waitpid(start_worker_pro.pid, 0) if status != 0: raise RuntimeError("starting worker failed") else: # A Spark standalone cluster has already been started by the user. assert master.startswith("spark://"), \ "Please input a valid master address for your Spark standalone cluster: " \ "spark://master:port" # Start pyspark-shell submit_args = "--master " + master submit_args = submit_args + gen_submit_args( driver_cores, driver_memory, num_executors, executor_cores, executor_memory, extra_python_lib, jars) conf = enrich_conf_for_spark(conf, driver_cores, driver_memory, num_executors, executor_cores, executor_memory, extra_executor_memory_for_ray) conf.update({ "spark.cores.max": num_executors * executor_cores, "spark.executorEnv.PYTHONHOME": "/".join(detect_python_location().split("/")[:-2]) }) zoo_bigdl_jar_path = ":".join(list( get_zoo_bigdl_classpath_on_driver())) if "spark.executor.extraClassPath" in conf: conf["spark.executor.extraClassPath"] = "{}:{}".format( zoo_bigdl_jar_path, conf["spark.executor.extraClassPath"]) else: conf["spark.executor.extraClassPath"] = zoo_bigdl_jar_path sc = self.create_sc(submit_args, conf) return sc
def __init__(self, sc, redis_port=None, password="******", object_store_memory=None, verbose=False, env=None, extra_params=None, include_webui=True, num_ray_nodes=None, ray_node_cpu_cores=None): """ The RayContext would initiate a ray cluster on top of the configuration of SparkContext. After creating RayContext, call the init method to set up the cluster. - For Spark local mode: The total available cores for Ray is equal to the number of Spark local cores. - For Spark cluster mode: The number of raylets to be created is equal to the number of Spark executors. The number of cores allocated for each raylet is equal to the number of cores for each Spark executor. You are allowed to specify num_ray_nodes and ray_node_cpu_cores for configurations to start raylets. :param sc: An instance of SparkContext. :param redis_port: The redis port for the ray head node. Default is None. The value would be randomly picked if not specified. :param password: The password for redis. Default to be "123456" if not specified. :param object_store_memory: The memory size for ray object_store in string. This can be specified in bytes(b), kilobytes(k), megabytes(m) or gigabytes(g). For example, "50b", "100k", "250m", "30g". :param verbose: True for more logs when starting ray. Default is False. :param env: The environment variable dict for running ray processes. Default is None. :param extra_params: The key value dict for extra options to launch ray. For example, extra_params={"temp-dir": "/tmp/ray/"} :param include_webui: True for including web ui when starting ray. Default is False. :param num_ray_nodes: The number of raylets to start across the cluster. For Spark local mode, you don't need to specify this value. For Spark cluster mode, it is default to be the number of Spark executors. If spark.executor.instances can't be detected in your SparkContext, you need to explicitly specify this. It is recommended that num_ray_nodes is not larger than the number of Spark executors to make sure there are enough resources in your cluster. :param ray_node_cpu_cores: The number of available cores for each raylet. For Spark local mode, it is default to be the number of Spark local cores. For Spark cluster mode, it is default to be the number of cores for each Spark executor. If spark.executor.cores or spark.cores.max can't be detected in your SparkContext, you need to explicitly specify this. It is recommended that ray_node_cpu_cores is not larger than the number of cores for each Spark executor to make sure there are enough resources in your cluster. """ assert sc is not None, "sc cannot be None, please create a SparkContext first" self.sc = sc self.initialized = False self.is_local = is_local(sc) self.verbose = verbose self.redis_password = password self.object_store_memory = resource_to_bytes(object_store_memory) self.ray_processesMonitor = None self.env = env self.extra_params = extra_params self.include_webui = include_webui self._address_info = None if self.is_local: self.num_ray_nodes = 1 spark_cores = self._get_spark_local_cores() if ray_node_cpu_cores: ray_node_cpu_cores = int(ray_node_cpu_cores) if ray_node_cpu_cores > spark_cores: warnings.warn( "ray_node_cpu_cores is larger than available Spark cores, " "make sure there are enough resources on your machine") self.ray_node_cpu_cores = ray_node_cpu_cores else: self.ray_node_cpu_cores = spark_cores # For Spark local mode, directly call ray.init() and ray.shutdown(). # ray.shutdown() would clear up all the ray related processes. # Ray Manager is only needed for Spark cluster mode to monitor ray processes. else: if self.sc.getConf().contains("spark.executor.cores"): executor_cores = int( self.sc.getConf().get("spark.executor.cores")) else: executor_cores = None if ray_node_cpu_cores: ray_node_cpu_cores = int(ray_node_cpu_cores) if executor_cores and ray_node_cpu_cores > executor_cores: warnings.warn( "ray_node_cpu_cores is larger than Spark executor cores, " "make sure there are enough resources on your cluster") self.ray_node_cpu_cores = ray_node_cpu_cores elif executor_cores: self.ray_node_cpu_cores = executor_cores else: raise Exception( "spark.executor.cores not detected in the SparkContext, " "you need to manually specify num_ray_nodes and ray_node_cpu_cores " "for RayContext to start ray services") if self.sc.getConf().contains("spark.executor.instances"): num_executors = int( self.sc.getConf().get("spark.executor.instances")) elif self.sc.getConf().contains("spark.cores.max"): import math num_executors = math.floor( int(self.sc.getConf().get("spark.cores.max")) / self.ray_node_cpu_cores) else: num_executors = None if num_ray_nodes: num_ray_nodes = int(num_ray_nodes) if num_executors and num_ray_nodes > num_executors: warnings.warn( "num_ray_nodes is larger than the number of Spark executors, " "make sure there are enough resources on your cluster") self.num_ray_nodes = num_ray_nodes elif num_executors: self.num_ray_nodes = num_executors else: raise Exception( "spark.executor.cores not detected in the SparkContext, " "you need to manually specify num_ray_nodes and ray_node_cpu_cores " "for RayContext to start ray services") from zoo.util.utils import detect_python_location self.python_loc = os.environ.get("PYSPARK_PYTHON", detect_python_location()) self.redis_port = random.randint( 10000, 65535) if not redis_port else int(redis_port) self.ray_service = RayServiceFuncGenerator( python_loc=self.python_loc, redis_port=self.redis_port, ray_node_cpu_cores=self.ray_node_cpu_cores, password=self.redis_password, object_store_memory=self.object_store_memory, verbose=self.verbose, env=self.env, include_webui=self.include_webui, extra_params=self.extra_params) RayContext._active_ray_context = self self.total_cores = self.num_ray_nodes * self.ray_node_cpu_cores
def init_orca_context(cluster_mode="local", cores=2, memory="2g", num_nodes=1, init_ray_on_spark=False, **kwargs): """ Creates or gets a SparkContext for different Spark cluster modes (and launch Ray services across the cluster if necessary). :param cluster_mode: The mode for the Spark cluster. One of "local", "yarn-client", "standalone" and "spark-submit". Default to be "local". For "spark-submit", you are supposed to use spark-submit to submit the application. In this case, please set the Spark configurations through command line options or the properties file. You need to use "spark-submit" for yarn-cluster mode. To make things easier, you are recommended to use the launching scripts under `analytics-zoo/scripts`. For other cluster modes, you are recommended to install and run analytics-zoo through pip, which is more convenient. :param cores: The number of cores to be used on each node. Default to be 2. :param memory: The memory allocated for each node. Default to be '2g'. :param num_nodes: The number of nodes to be used in the cluster. Default to be 1. For Spark local, num_nodes should always be 1 and you don't need to change it. :param init_ray_on_spark: Whether to launch Ray services across the cluster. Default to be False and in this case the Ray cluster would be launched lazily when Ray is involved in Project Orca. :param kwargs: The extra keyword arguments used for creating SparkContext and launching Ray if any. :return: An instance of SparkContext. """ cluster_mode = cluster_mode.lower() spark_args = {} for key in ["conf", "spark_log_level", "redirect_spark_log"]: if key in kwargs: spark_args[key] = kwargs[key] if cluster_mode == "spark-submit": from zoo import init_nncontext sc = init_nncontext(**spark_args) elif cluster_mode == "local": assert num_nodes == 1, "For Spark local mode, num_nodes should be 1" os.environ["SPARK_DRIVER_MEMORY"] = memory if "python_location" in kwargs: spark_args["python_location"] = kwargs["python_location"] from zoo import init_spark_on_local sc = init_spark_on_local(cores, **spark_args) elif cluster_mode.startswith("yarn"): # yarn or yarn-client if cluster_mode == "yarn-cluster": raise ValueError( 'For yarn-cluster mode, please set cluster_mode to "spark-submit" ' 'and submit the application via spark-submit instead') hadoop_conf = os.environ.get("HADOOP_CONF_DIR") if not hadoop_conf: assert "hadoop_conf" in kwargs,\ "Directory path to hadoop conf not found for yarn-client mode. Please either " \ "specify argument hadoop_conf or set the environment variable HADOOP_CONF_DIR" hadoop_conf = kwargs["hadoop_conf"] from zoo.util.utils import detect_python_location python_location = detect_python_location( ) # /path/to/conda/envs/conda_name/bin/python assert "envs" in python_location, "You must use a conda environment for yarn-client mode" for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "penv_archive", "additional_archive", "hadoop_user_name", "spark_yarn_archive", "jars" ]: if key in kwargs: spark_args[key] = kwargs[key] from zoo import init_spark_on_yarn sc = init_spark_on_yarn(hadoop_conf=hadoop_conf, conda_name=python_location.split("/")[-3], num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) elif cluster_mode == "standalone": for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "jars", "master", "enable_numa_binding" ]: if key in kwargs: spark_args[key] = kwargs[key] from zoo import init_spark_standalone sc = init_spark_standalone(num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) else: raise ValueError( "cluster_mode can only be local, yarn-client, standalone or spark-submit, " "but got: %s".format(cluster_mode)) ray_args = {} for key in [ "redis_port", "password", "object_store_memory", "verbose", "env", "extra_params", "num_ray_nodes", "ray_node_cpu_cores" ]: if key in kwargs: ray_args[key] = kwargs[key] from zoo.ray import RayContext ray_ctx = RayContext(sc, **ray_args) if init_ray_on_spark: driver_cores = 0 # This is the default value. if "driver_cores" in kwargs: driver_cores = kwargs["driver_cores"] ray_ctx.init(driver_cores=driver_cores) return sc
def main(): parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('data', metavar='DIR', help='path to dataset') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)') parser.add_argument('--epochs', default=90, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument( '-b', '--batch-size', default=256, type=int, metavar='N', help='mini-batch size (default: 256), this is the total ' 'batch size of all GPUs on the current node when ' 'using Data Parallel or Distributed Data Parallel') parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate', dest='lr') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('-p', '--print-freq', default=10, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model') parser.add_argument('--world-size', default=-1, type=int, help='number of nodes for distributed training') parser.add_argument('--rank', default=-1, type=int, help='node rank for distributed training') parser.add_argument('--seed', default=None, type=int, help='seed for initializing training. ') parser.add_argument('--cores', default=4, type=int, help='num of CPUs to use.') parser.add_argument('--nodes', default=1, type=int, help='num of nodes to use.') parser.add_argument('--executor_memory', default='20g', type=str, help='size of executor memory.') parser.add_argument('--driver_memory', default='20g', type=str, help='size of driver memory.') parser.add_argument('--driver_cores', default=1, type=int, help='num of driver cores to use.') args = parser.parse_args() # sc = init_nncontext() if os.environ.get('HADOOP_CONF_DIR') is None: sc = init_spark_on_local(cores=args.cores, conf={"spark.driver.memory": "20g"}) else: hadoop_conf_dir = os.environ.get('HADOOP_CONF_DIR') num_executors = args.nodes executor_memory = args.executor_memory driver_memory = args.driver_memory driver_cores = args.driver_cores num_cores_per_executor = args.cores os.environ['ZOO_MKL_NUMTHREADS'] = str(num_cores_per_executor) os.environ['OMP_NUM_THREADS'] = str(num_cores_per_executor) sc = init_spark_on_yarn( hadoop_conf=hadoop_conf_dir, conda_name=detect_python_location().split("/") [-3], # The name of the created conda-env num_executors=num_executors, executor_cores=num_cores_per_executor, executor_memory=executor_memory, driver_memory=driver_memory, driver_cores=driver_cores, conf={ "spark.rpc.message.maxSize": "1024", "spark.task.maxFailures": "1", "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1" }) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) model = torchvision.models.resnet50() val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False) iterationPerEpoch = int(math.ceil(float(1281167) / args.batch_size)) step = Step(iterationPerEpoch * 30, 0.1) zooOptimizer = SGD(args.lr, momentum=args.momentum, dampening=0.0, leaningrate_schedule=step, weightdecay=args.weight_decay) zooModel = TorchModel.from_pytorch(model) criterion = torch.nn.CrossEntropyLoss() zooCriterion = TorchLoss.from_pytorch(criterion) estimator = Estimator(zooModel, optim_methods=zooOptimizer) train_featureSet = FeatureSet.pytorch_dataloader(train_loader) test_featureSet = FeatureSet.pytorch_dataloader(val_loader) estimator.train_minibatch(train_featureSet, zooCriterion, end_trigger=MaxEpoch(90), checkpoint_trigger=EveryEpoch(), validation_set=test_featureSet, validation_method=[Accuracy(), Top5Accuracy()])
def init_spark_standalone(self, num_executors, executor_cores, executor_memory="10g", driver_cores=4, driver_memory="1g", master=None, extra_executor_memory_for_ray=None, extra_python_lib=None, conf=None, jars=None): import subprocess import pyspark from zoo.util.utils import get_node_ip from zoo.util.engine import get_analytics_zoo_classpath from bigdl.util.engine import get_bigdl_classpath if 'PYSPARK_PYTHON' not in os.environ: os.environ["PYSPARK_PYTHON"] = detect_python_location() if not master: pyspark_home = os.path.abspath(pyspark.__file__ + "/../") zoo_standalone_home = os.path.abspath( __file__ + "/../../share/bin/standalone") node_ip = get_node_ip() SparkRunner.standalone_env = { "SPARK_HOME": pyspark_home, "ZOO_STANDALONE_HOME": zoo_standalone_home, # If not set this, by default master is hostname but not ip, "SPARK_MASTER_HOST": node_ip } # The scripts installed from pip don't have execution permission # and need to first give them permission. pro = subprocess.Popen( ["chmod", "-R", "+x", "{}/sbin".format(zoo_standalone_home)]) os.waitpid(pro.pid, 0) # Start master start_master_pro = subprocess.Popen( "{}/sbin/start-master.sh".format(zoo_standalone_home), shell=True, env=SparkRunner.standalone_env) os.waitpid(start_master_pro.pid, 0) master = "spark://{}:7077".format( node_ip) # 7077 is the default port # Start worker start_worker_pro = subprocess.Popen( "{}/sbin/start-worker.sh {}".format(zoo_standalone_home, master), shell=True, env=SparkRunner.standalone_env) os.waitpid(start_worker_pro.pid, 0) else: # A Spark standalone cluster has already been started by the user. assert master.startswith("spark://"), \ "Please input a valid master address for your Spark standalone cluster: " \ "spark://master:port" # Start pyspark-shell submit_args = " --master " + master submit_args = submit_args + " --driver-cores {} --driver-memory {} --num-executors {}" \ " --executor-cores {} --executor-memory {}"\ .format(driver_cores, driver_memory, num_executors, executor_cores, executor_memory) if extra_python_lib: submit_args = submit_args + " --py-files {}".format( extra_python_lib) if jars: submit_args = submit_args + " --jars {}".format(jars) submit_args = submit_args + " pyspark-shell" os.environ['PYSPARK_SUBMIT_ARGS'] = submit_args zoo_bigdl_jar_path = ":".join( [get_analytics_zoo_classpath(), get_bigdl_classpath()]) spark_conf = init_spark_conf(conf) \ .set("spark.driver.cores", driver_cores) \ .set("spark.driver.memory", driver_memory) \ .set("spark.executor.instances", num_executors) \ .set("spark.executor.cores", executor_cores) \ .set("spark.cores.max", num_executors * executor_cores) \ .set("spark.executorEnv.PYTHONHOME", "/".join(detect_python_location().split("/")[:-2])) if extra_executor_memory_for_ray: spark_conf.set("spark.executor.memoryOverhead", extra_executor_memory_for_ray) if spark_conf.contains("spark.executor.extraClassPath"): spark_conf.set( "spark.executor.extraClassPath", "{}:{}".format(zoo_bigdl_jar_path, conf.get("spark.executor.extraClassPath"))) else: spark_conf.set("spark.executor.extraClassPath", zoo_bigdl_jar_path) sc = init_nncontext(spark_conf, spark_log_level=self.spark_log_level, redirect_spark_log=self.redirect_spark_log) return sc