def setup(self, func_for_row, func_for_rows=None): if self.is_setup: raise ValueError("setup can be only invoke once") self.is_setup = True import ray from pyjava.rayfix import RayWrapper rayw = RayWrapper() if not self.is_in_mlsql: if func_for_rows is not None: func = ray.remote(func_for_rows) return ray.get(func.remote(self.mock_data)) else: func = ray.remote(func_for_row) def iter_all(rows): return [ray.get(func.remote(row)) for row in rows] iter_all_func = ray.remote(iter_all) return ray.get(iter_all_func.remote(self.mock_data)) buffer = [] for server_info in self.build_servers_in_ray(): server = rayw.get_actor(server_info.server_id) rci = ray.get(server.connect_info.remote()) buffer.append(rci) server.serve.remote(func_for_row, func_for_rows) items = [vars(server) for server in buffer] self.python_context.build_result(items, 1024) return buffer
def data_servers_in_ray(self): import ray from pyjava.rayfix import RayWrapper rayw = RayWrapper() for server_id in self.server_ids_in_ray: server = rayw.get_actor(server_id) yield ray.get(server.connect_info.remote())
def build_servers_in_ray(self): from pyjava.rayfix import RayWrapper from pyjava.api.serve import RayDataServer buffer = [] ray = RayWrapper() for (server_id, java_server) in zip(self.server_ids_in_ray, self.servers): rds = ray.options(RayDataServer, name=server_id, detached=True, max_concurrency=2).remote( server_id, java_server, 0, java_server.timezone) res = ray.get(rds.connect_info.remote()) if self.is_dev: logging.debug( "build RayDataServer server_id:{} java_server: {} servers:{}" .format(server_id, str(vars(java_server)), str(vars(res)))) buffer.append(res) return buffer
def setup(self, func_for_row, func_for_rows=None): if self.is_setup: raise ValueError("setup can be only invoke once") self.is_setup = True is_data_mode = "dataMode" in self.conf() and self.conf( )["dataMode"] == "data" if not is_data_mode: raise Exception(''' Please setup dataMode as data instead of model. Try run: `!python conf "dataMode=data"` or add comment like: `#%dataMode=data` if you are in notebook. ''') import ray from pyjava.rayfix import RayWrapper rayw = RayWrapper() if not self.is_in_mlsql: if func_for_rows is not None: func = ray.remote(func_for_rows) return ray.get(func.remote(self.mock_data)) else: func = ray.remote(func_for_row) def iter_all(rows): return [ray.get(func.remote(row)) for row in rows] iter_all_func = ray.remote(iter_all) return ray.get(iter_all_func.remote(self.mock_data)) buffer = [] for server_info in self.build_servers_in_ray(): server = rayw.get_actor(server_info.server_id) rci = ray.get(server.connect_info.remote()) buffer.append(rci) server.serve.remote(func_for_row, func_for_rows) items = [vars(server) for server in buffer] self.python_context.build_result(items, 1024) return buffer
def connect(_context, url, **kwargs): if isinstance(_context, PythonContext): context = _context elif isinstance(_context, dict): if 'context' in _context: context = _context['context'] else: ''' we are not in MLSQL ''' context = PythonContext("", [], {"pythonMode": "ray"}) context.rayContext.is_in_mlsql = False else: raise Exception( "context is not detect. make sure it's in globals().") if url == "local": from pyjava.rayfix import RayWrapper ray = RayWrapper() if ray.ray_version < StrictVersion('1.6.0'): raise Exception("URL:local is only support in ray >= 1.6.0") # if not ray.ray_instance.is_initialized: ray.ray_instance.shutdown() ray.ray_instance.init(namespace="default") elif url is not None: from pyjava.rayfix import RayWrapper ray = RayWrapper() is_udf_client = context.conf.get("UDF_CLIENT") if is_udf_client is None: ray.shutdown() ray.init(url, **kwargs) if is_udf_client and url not in RayContext.conn_cache: ray.init(url, **kwargs) RayContext.conn_cache[url] = 1 return context.rayContext
def __init__(self, server_id, java_server, port=0, timezone="Asia/Harbin"): self.server = OnceServer( RayWrapper().get_address(), port, java_server.timezone) try: (rel_host, rel_port) = self.server.bind() except Exception as e: print(traceback.format_exc()) raise e self.host = rel_host self.port = rel_port self.timezone = timezone self.server_id = server_id self.java_server = java_server self.is_dev = utils.is_dev()
def connect(_context, url, **kwargs): if isinstance(_context, PythonContext): context = _context elif isinstance(_context, dict): if 'context' in _context: context = _context['context'] else: ''' we are not in MLSQL ''' context = PythonContext("", [], {"pythonMode": "ray"}) context.rayContext.is_in_mlsql = False else: raise Exception("context is not set") if url is not None: from pyjava.rayfix import RayWrapper ray = RayWrapper() ray.shutdown() ray.init(url, **kwargs) return context.rayContext
import os import pandas as pd import sys sys.path.append("../../") from pyjava.api.mlsql import DataServer from pyjava.api.serve import RayDataServer from pyjava.rayfix import RayWrapper # import ray os.environ["ARROW_PRE_0_15_IPC_FORMAT"] = "1" ray = RayWrapper() ray.init(address='auto', _redis_password='******') ddata = pd.DataFrame(data=[[1, 2, 3, 4], [2, 3, 4, 5]]) server_id = "wow1" java_server = DataServer("127.0.0.1", 11111, "Asia/Harbin") rds = RayDataServer.options(name=server_id, max_concurrency=2).remote(server_id, java_server, 0, "Asia/Harbin") print(ray.get(rds.connect_info.remote())) def echo(row): return row