def init_ray_context_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext sc = init_spark_on_local(cores=4, spark_log_level="INFO") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def rayonspark_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext global sc global ray_ctx sc = init_spark_on_local(cores=8, spark_log_level="INFO") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def test_local(self): @ray.remote class TestRay: def hostname(self): import socket return socket.gethostname() sc = init_spark_on_local(cores=4) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, 4)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() sc.stop()
def orca_data_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext global ray_ctx sc = init_spark_on_local(cores=4, spark_log_level="INFO") access_key_id = os.getenv("AWS_ACCESS_KEY_ID") secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") ray_ctx = RayContext(sc=sc, object_store_memory="1g", env={ "AWS_ACCESS_KEY_ID": access_key_id, "AWS_SECRET_ACCESS_KEY": secret_access_key }) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def orca_data_fixture(): from zoo import init_spark_on_local from zoo.ray import RayContext ZooContext._orca_eager_mode = True sc = init_spark_on_local(cores=4, spark_log_level="INFO") access_key_id = os.getenv("AWS_ACCESS_KEY_ID") secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") if access_key_id is not None and secret_access_key is not None: ray_ctx = RayContext(sc=sc, object_store_memory="1g", env={ "AWS_ACCESS_KEY_ID": access_key_id, "AWS_SECRET_ACCESS_KEY": secret_access_key }) else: ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
format('json').mode("overwrite").save("/tmp/ray-pandas-example") # init ray context ray_ctx = RayContext(sc=sc, object_store_memory="5g") ray_ctx.init(object_store_memory="5g") # read data data_shard = zoo.xshard.pandas.read_json("/tmp/ray-pandas-example", ray_ctx) # collect data data = data_shard.collect() print("collected data :") print(data[0].head()) # repartition partitions = data_shard.get_partitions() print("get %d partitions" % len(partitions)) data_shard.repartition(2) new_partitions = data_shard.get_partitions() print("get %d partitions after repartition" % len(new_partitions)) # apply function on each element data_shards = data_shard.apply(process_feature, 6, 24) data2 = data_shards.collect() print("collected new data :") print(data2[0].head()) ray_ctx.stop() sc.stop()