def main(train_path, pred_path, n_pred, dt, target, time_limit_min): os.environ["TRIALRUNNER_WALLTIME_LIMIT"] = str(time_limit_min * 60) df_train = pd.read_csv(train_path) df_train[dt] = pd.to_datetime(df_train[dt]) sc = init_spark_on_local(cores=mp.cpu_count(), spark_log_level="ERROR") ray_ctx = RayContext(sc=sc) ray_ctx.init() extra_features_col = list(set(df_train.columns) - set([dt, target])) if not extra_features_col: extra_features_col = None tsp = TimeSequencePredictor(dt_col=dt, target_col=target, extra_features_col=extra_features_col, future_seq_len=n_pred) pipeline = tsp.fit(df_train, resources_per_trial={"cpu": 4}, recipe=BayesRecipe(num_samples=100000)) df_pred = pipeline.predict(df_train[-2:]) x_pred = pd.date_range(df_pred.iloc[0][dt], periods=n_pred, freq=pd.infer_freq(df_train[dt])) y_pred = df_pred.iloc[0][1:] df_pred = pd.DataFrame({dt: x_pred, target: y_pred}) df_pred.to_csv(pred_path, index=False)
def automl_fixture(): from zoo import init_spark_on_local from zoo.ray.util.raycontext import RayContext sc = init_spark_on_local(cores=4, spark_log_level="INFO") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def rayonspark_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext global ray_ctx sc = init_spark_on_local(cores=8, spark_log_level="INFO") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def test_local(self): node_num = 4 sc = init_spark_on_local(cores=node_num) ray_ctx = RayContext(sc=sc) ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print([ray.get(actor.hostname.remote()) for actor in actors]) ray_ctx.stop() sc.stop() for process_info in ray_ctx.ray_processesMonitor.process_infos: for pid in process_info.pids: assert not psutil.pid_exists(pid)
def test_local(self): @ray.remote class TestRay: def hostname(self): import socket return socket.gethostname() sc = init_spark_on_local(cores=4) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, 4)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() sc.stop()
def orca_data_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext global ray_ctx sc = init_spark_on_local(cores=4, spark_log_level="INFO") access_key_id = os.getenv("AWS_ACCESS_KEY_ID") secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") ray_ctx = RayContext(sc=sc, object_store_memory="1g", env={ "AWS_ACCESS_KEY_ID": access_key_id, "AWS_SECRET_ACCESS_KEY": secret_access_key }) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def orca_data_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext ZooContext._orca_eager_mode = True sc = init_spark_on_local(cores=4, spark_log_level="INFO") access_key_id = os.getenv("AWS_ACCESS_KEY_ID") secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") if access_key_id is not None and secret_access_key is not None: ray_ctx = RayContext(sc=sc, object_store_memory="1g", env={ "AWS_ACCESS_KEY_ID": access_key_id, "AWS_SECRET_ACCESS_KEY": secret_access_key }) else: ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def test_local(self): node_num = 4 sc = init_spark_on_local(cores=node_num) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() time.sleep(3) # repeat print("-------------------first repeat begin!------------------") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() sc.stop() time.sleep(3) for process_info in ray_ctx.ray_processesMonitor.process_infos: for pid in process_info.pids: assert not psutil.pid_exists(pid)
| (df['hours'] == 0)).astype(int) return df if __name__ == "__main__": parser = OptionParser() parser.add_option("-f", type=str, dest="file_path", help="The file path to be read") (options, args) = parser.parse_args(sys.argv) # Prepare csv files df = pd.read_csv(options.file_path) sc = init_spark_on_local(cores="*") sqlContext = SQLContext(sc) num_nodes, num_cores = get_node_and_core_number() df_spark = sqlContext.createDataFrame(df) df_spark.printSchema() df_spark.repartition(num_cores).write.\ format('json').mode("overwrite").save("/tmp/ray-pandas-example") # init ray context ray_ctx = RayContext(sc=sc, object_store_memory="5g") ray_ctx.init(object_store_memory="5g") # read data data_shard = zoo.xshard.pandas.read_json("/tmp/ray-pandas-example", ray_ctx)
args = parser.parse_args() if args.hadoop_conf: sc = init_spark_on_yarn( hadoop_conf=args.hadoop_conf, conda_name=args.conda_name, num_executor=args.slave_num, executor_cores=args.executor_cores, executor_memory=args.executor_memory, driver_memory=args.driver_memory, driver_cores=args.driver_cores, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray) ray_ctx = RayContext(sc=sc, object_store_memory=args.object_store_memory) else: sc = init_spark_on_local(cores=args.driver_cores) ray_ctx = RayContext(sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() batch_size = args.batch_size # Run the reinforcement learning. running_reward = None batch_num = 1 model = {} # "Xavier" initialization. model["W1"] = np.random.randn(H, D) / np.sqrt(D) model["W2"] = np.random.randn(H) / np.sqrt(H) # Update buffers that add up gradients over a batch. grad_buffer = {k: np.zeros_like(v) for k, v in model.items()} # Update the rmsprop memory.
parser.add_argument("--object_store_memory", type=str, default="4g", help="The memory to store data on local." "You can change it depending on your own cluster setting.") if __name__ == "__main__": args = parser.parse_args() if args.hadoop_conf: sc = init_spark_on_yarn( hadoop_conf=args.hadoop_conf, conda_name=args.conda_name, num_executor=args.slave_num, executor_cores=args.executor_cores, executor_memory=args.executor_memory, driver_memory=args.driver_memory, driver_cores=args.driver_cores, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray) ray_ctx = RayContext( sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() else: sc = init_spark_on_local() ray_ctx = RayContext( sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() runner = HorovodRayTrainer(ray_ctx) runner.train(func=run_horovod)
def estimator_for_spark_fixture(): from zoo import init_spark_on_local sc = init_spark_on_local(cores=4, spark_log_level="INFO") yield sc sc.stop()
import ray import ray.rllib.agents.a3c.a2c as a2c from ray.tune.logger import pretty_print from pythonlearn.tfmodels.rlrec.movie_env import * import time import sys #ray.init() from zoo.ray import RayContext from zoo import init_spark_on_local spark_conf = {"spark.executor.memory": "24g", "spark.driver.memory": "24g"} sc = init_spark_on_local(cores=8, conf=spark_conf) ray_ctx = RayContext(sc=sc, object_store_memory="4g") ray_ctx.init() env_conf= EnvConfig({'user_max': 6040, 'movie_max': 3952, 'ncf_dim':20 }) trainer_conf = a2c.A2C_DEFAULT_CONFIG.copy() trainer_conf["env_config"] = env_conf._values # print(sys.argv[1]) # lr = float(sys.argv[1]) * 0.0005 lr = 0.0005 trainer_conf["lr"] = lr trainer_conf["num_gpus"] = 0 trainer_conf["num_workers"] = 8 trainer_conf["env"]=MovieEnv
def init_orca_context(cluster_mode="local", cores=2, memory="2g", num_nodes=1, init_ray_on_spark=False, **kwargs): """ Creates or gets a SparkContext for different Spark cluster modes (and launch Ray services across the cluster if necessary). :param cluster_mode: The mode for the Spark cluster. One of "local", "yarn-client", "standalone" and "spark-submit". Default to be "local". For "spark-submit", you are supposed to use spark-submit to submit the application. In this case, please set the Spark configurations through command line options or the properties file. You need to use "spark-submit" for yarn-cluster mode. To make things easier, you are recommended to use the launching scripts under `analytics-zoo/scripts`. For other cluster modes, you are recommended to install and run analytics-zoo through pip, which is more convenient. :param cores: The number of cores to be used on each node. Default to be 2. :param memory: The memory allocated for each node. Default to be '2g'. :param num_nodes: The number of nodes to be used in the cluster. Default to be 1. For Spark local, num_nodes should always be 1 and you don't need to change it. :param init_ray_on_spark: Whether to launch Ray services across the cluster. Default to be False and in this case the Ray cluster would be launched lazily when Ray is involved in Project Orca. :param kwargs: The extra keyword arguments used for creating SparkContext and launching Ray if any. :return: An instance of SparkContext. """ cluster_mode = cluster_mode.lower() spark_args = {} for key in ["conf", "spark_log_level", "redirect_spark_log"]: if key in kwargs: spark_args[key] = kwargs[key] if cluster_mode == "spark-submit": from zoo import init_nncontext sc = init_nncontext(**spark_args) elif cluster_mode == "local": assert num_nodes == 1, "For Spark local mode, num_nodes should be 1" os.environ["SPARK_DRIVER_MEMORY"] = memory if "python_location" in kwargs: spark_args["python_location"] = kwargs["python_location"] from zoo import init_spark_on_local sc = init_spark_on_local(cores, **spark_args) elif cluster_mode.startswith("yarn"): # yarn or yarn-client if cluster_mode == "yarn-cluster": raise ValueError( 'For yarn-cluster mode, please set cluster_mode to "spark-submit" ' 'and submit the application via spark-submit instead') hadoop_conf = os.environ.get("HADOOP_CONF_DIR") if not hadoop_conf: assert "hadoop_conf" in kwargs,\ "Directory path to hadoop conf not found for yarn-client mode. Please either " \ "specify argument hadoop_conf or set the environment variable HADOOP_CONF_DIR" hadoop_conf = kwargs["hadoop_conf"] from zoo.util.utils import detect_python_location python_location = detect_python_location( ) # /path/to/conda/envs/conda_name/bin/python assert "envs" in python_location, "You must use a conda environment for yarn-client mode" for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "penv_archive", "additional_archive", "hadoop_user_name", "spark_yarn_archive", "jars" ]: if key in kwargs: spark_args[key] = kwargs[key] from zoo import init_spark_on_yarn sc = init_spark_on_yarn(hadoop_conf=hadoop_conf, conda_name=python_location.split("/")[-3], num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) elif cluster_mode == "standalone": for key in [ "driver_cores", "driver_memory", "extra_executor_memory_for_ray", "extra_python_lib", "jars", "master", "enable_numa_binding" ]: if key in kwargs: spark_args[key] = kwargs[key] from zoo import init_spark_standalone sc = init_spark_standalone(num_executors=num_nodes, executor_cores=cores, executor_memory=memory, **spark_args) else: raise ValueError( "cluster_mode can only be local, yarn-client, standalone or spark-submit, " "but got: %s".format(cluster_mode)) ray_args = {} for key in [ "redis_port", "password", "object_store_memory", "verbose", "env", "extra_params", "num_ray_nodes", "ray_node_cpu_cores" ]: if key in kwargs: ray_args[key] = kwargs[key] from zoo.ray import RayContext ray_ctx = RayContext(sc, **ray_args) if init_ray_on_spark: driver_cores = 0 # This is the default value. if "driver_cores" in kwargs: driver_cores = kwargs["driver_cores"] ray_ctx.init(driver_cores=driver_cores) return sc
"You can change it depending on your own cluster setting.") parser.add_argument("--workers_per_node", type=int, default=1, help="The number of workers to run on each node") parser.add_argument("--local_cores", type=int, default=4, help="The number of cores while running on local mode") args = parser.parse_args() if args.hadoop_conf: sc = init_spark_on_yarn( hadoop_conf=args.hadoop_conf, conda_name=args.conda_name, num_executors=args.num_executors, executor_cores=args.executor_cores, executor_memory=args.executor_memory, driver_memory=args.driver_memory, driver_cores=args.driver_cores, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray) ray_ctx = RayContext(sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() else: sc = init_spark_on_local(cores=args.local_cores) ray_ctx = RayContext(sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() train_example(workers_per_node=args.workers_per_node)