def fetch_and_execute_function_to_run(self, key): """Run on arbitrary function on the worker.""" (driver_id, serialized_function, run_on_other_drivers) = self.redis_client.hmget( key, ["driver_id", "function", "run_on_other_drivers"]) if (run_on_other_drivers == "False" and self.worker.mode == ray.SCRIPT_MODE and driver_id != self.worker.task_driver_id.id()): return try: # Deserialize the function. function = pickle.loads(serialized_function) # Run the function. function({"worker": self.worker}) except Exception: # If an exception was thrown when the function was run, we record # the traceback and notify the scheduler of the failure. traceback_str = traceback.format_exc() # Log the error message. name = function.__name__ if ("function" in locals() and hasattr( function, "__name__")) else "" utils.push_error_to_driver( self.worker, ray_constants.FUNCTION_TO_RUN_PUSH_ERROR, traceback_str, driver_id=driver_id, data={"name": name})
def fetch_and_execute_function_to_run(self, key): """Run on arbitrary function on the worker.""" (job_id, serialized_function, run_on_other_drivers) = self.redis_client.hmget( key, ["job_id", "function", "run_on_other_drivers"]) if (utils.decode(run_on_other_drivers) == "False" and self.worker.mode == ray.SCRIPT_MODE and job_id != self.worker.current_job_id.binary()): return try: # FunctionActorManager may call pickle.loads at the same time. # Importing the same module in different threads causes deadlock. with self.worker.function_actor_manager.lock: # Deserialize the function. function = pickle.loads(serialized_function) # Run the function. function({"worker": self.worker}) except Exception: # If an exception was thrown when the function was run, we record # the traceback and notify the scheduler of the failure. traceback_str = traceback.format_exc() # Log the error message. utils.push_error_to_driver( self.worker, ray_constants.FUNCTION_TO_RUN_PUSH_ERROR, traceback_str, job_id=ray.JobID(job_id))
def fetch_and_register_remote_function(self, key): """Import a remote function.""" (job_id_str, function_id_str, function_name, serialized_function, module, max_calls) = self._worker.redis_client.hmget(key, [ "job_id", "function_id", "function_name", "function", "module", "max_calls" ]) function_id = ray.FunctionID(function_id_str) job_id = ray.JobID(job_id_str) function_name = decode(function_name) max_calls = int(max_calls) module = decode(module) # This function is called by ImportThread. This operation needs to be # atomic. Otherwise, there is race condition. Another thread may use # the temporary function above before the real function is ready. with self.lock: self._num_task_executions[job_id][function_id] = 0 try: function = pickle.loads(serialized_function) except Exception: def f(*args, **kwargs): raise RuntimeError( "This function was not imported properly.") # Use a placeholder method when function pickled failed self._function_execution_info[job_id][function_id] = ( FunctionExecutionInfo(function=f, function_name=function_name, max_calls=max_calls)) # If an exception was thrown when the remote function was # imported, we record the traceback and notify the scheduler # of the failure. traceback_str = format_error_message(traceback.format_exc()) # Log the error message. push_error_to_driver( self._worker, ray_constants.REGISTER_REMOTE_FUNCTION_PUSH_ERROR, "Failed to unpickle the remote function '{}' with " "function ID {}. Traceback:\n{}".format( function_name, function_id.hex(), traceback_str), job_id=job_id) else: # The below line is necessary. Because in the driver process, # if the function is defined in the file where the python # script was started from, its module is `__main__`. # However in the worker process, the `__main__` module is a # different module, which is `default_worker.py` function.__module__ = module self._function_execution_info[job_id][function_id] = ( FunctionExecutionInfo(function=function, function_name=function_name, max_calls=max_calls)) # Add the function to the function table. self._worker.redis_client.rpush( b"FunctionTable:" + function_id.binary(), self._worker.worker_id)
def fetch_and_execute_function_to_run(self, key): """Run on arbitrary function on the worker.""" from ray.worker import SCRIPT_MODE, SILENT_MODE driver_id, serialized_function = self.redis_client.hmget( key, ["driver_id", "function"]) if (self.worker.mode in [SCRIPT_MODE, SILENT_MODE] and driver_id != self.worker.task_driver_id.id()): # This export was from a different driver and there's no need for # this driver to import it. return try: # Deserialize the function. function = pickle.loads(serialized_function) # Run the function. function({"worker": self.worker}) except Exception: # If an exception was thrown when the function was run, we record # the traceback and notify the scheduler of the failure. traceback_str = traceback.format_exc() # Log the error message. name = function.__name__ if ("function" in locals() and hasattr( function, "__name__")) else "" utils.push_error_to_driver( self.worker, ray_constants.FUNCTION_TO_RUN_PUSH_ERROR, traceback_str, driver_id=driver_id, data={"name": name})
def fetch_and_execute_function_to_run(self, key): """Run on arbitrary function on the worker.""" (driver_id, serialized_function, run_on_other_drivers) = self.redis_client.hmget( key, ["driver_id", "function", "run_on_other_drivers"]) if (utils.decode(run_on_other_drivers) == "False" and self.worker.mode == ray.SCRIPT_MODE and driver_id != self.worker.task_driver_id.binary()): return try: # Deserialize the function. function = pickle.loads(serialized_function) # Run the function. function({"worker": self.worker}) except Exception: # If an exception was thrown when the function was run, we record # the traceback and notify the scheduler of the failure. traceback_str = traceback.format_exc() # Log the error message. utils.push_error_to_driver( self.worker, ray_constants.FUNCTION_TO_RUN_PUSH_ERROR, traceback_str, driver_id=ray.DriverID(driver_id))
def _load_actor_class_from_gcs(self, job_id, function_descriptor): """Load actor class from GCS.""" key = (b"ActorClass:" + job_id.binary() + b":" + function_descriptor.function_id.binary()) # Wait for the actor class key to have been imported by the # import thread. TODO(rkn): It shouldn't be possible to end # up in an infinite loop here, but we should push an error to # the driver if too much time is spent here. while key not in self.imported_actor_classes: time.sleep(0.001) # Fetch raw data from GCS. (job_id_str, class_name, module, pickled_class, actor_method_names) = self._worker.redis_client.hmget( key, ["job_id", "class_name", "module", "class", "actor_method_names"]) class_name = ensure_str(class_name) module_name = ensure_str(module) job_id = ray.JobID(job_id_str) actor_method_names = json.loads(ensure_str(actor_method_names)) actor_class = None try: with self.lock: actor_class = pickle.loads(pickled_class) except Exception: logger.exception( "Failed to load actor class %s.".format(class_name)) # The actor class failed to be unpickled, create a fake actor # class instead (just to produce error messages and to prevent # the driver from hanging). actor_class = self._create_fake_actor_class( class_name, actor_method_names) # If an exception was thrown when the actor was imported, we record # the traceback and notify the scheduler of the failure. traceback_str = ray.utils.format_error_message( traceback.format_exc()) # Log the error message. push_error_to_driver( self._worker, ray_constants.REGISTER_ACTOR_PUSH_ERROR, "Failed to unpickle actor class '{}' for actor ID {}. " "Traceback:\n{}".format(class_name, self._worker.actor_id.hex(), traceback_str), job_id=job_id) # TODO(rkn): In the future, it might make sense to have the worker # exit here. However, currently that would lead to hanging if # someone calls ray.get on a method invoked on the actor. # The below line is necessary. Because in the driver process, # if the function is defined in the file where the python script # was started from, its module is `__main__`. # However in the worker process, the `__main__` module is a # different module, which is `default_worker.py` actor_class.__module__ = module_name return actor_class
def _load_actor_class_from_gcs(self, driver_id, function_descriptor): """Load actor class from GCS.""" key = (b"ActorClass:" + driver_id.binary() + b":" + function_descriptor.function_id.binary()) # Wait for the actor class key to have been imported by the # import thread. TODO(rkn): It shouldn't be possible to end # up in an infinite loop here, but we should push an error to # the driver if too much time is spent here. while key not in self.imported_actor_classes: time.sleep(0.001) # Fetch raw data from GCS. (driver_id_str, class_name, module, pickled_class, actor_method_names) = self._worker.redis_client.hmget( key, [ "driver_id", "class_name", "module", "class", "actor_method_names" ]) class_name = ensure_str(class_name) module_name = ensure_str(module) driver_id = ray.DriverID(driver_id_str) actor_method_names = json.loads(ensure_str(actor_method_names)) actor_class = None try: with self._worker.lock: actor_class = pickle.loads(pickled_class) except Exception: logger.exception( "Failed to load actor class %s.".format(class_name)) # The actor class failed to be unpickled, create a fake actor # class instead (just to produce error messages and to prevent # the driver from hanging). actor_class = self._create_fake_actor_class( class_name, actor_method_names) # If an exception was thrown when the actor was imported, we record # the traceback and notify the scheduler of the failure. traceback_str = ray.utils.format_error_message( traceback.format_exc()) # Log the error message. push_error_to_driver( self._worker, ray_constants.REGISTER_ACTOR_PUSH_ERROR, "Failed to unpickle actor class '{}' for actor ID {}. " "Traceback:\n{}".format(class_name, self._worker.actor_id.hex(), traceback_str), driver_id) # TODO(rkn): In the future, it might make sense to have the worker # exit here. However, currently that would lead to hanging if # someone calls ray.get on a method invoked on the actor. # The below line is necessary. Because in the driver process, # if the function is defined in the file where the python script # was started from, its module is `__main__`. # However in the worker process, the `__main__` module is a # different module, which is `default_worker.py` actor_class.__module__ = module_name return actor_class
def fetch_and_register_remote_function(self, key): """Import a remote function.""" (driver_id_str, function_id_str, function_name, serialized_function, num_return_vals, module, resources, max_calls) = self._worker.redis_client.hmget(key, [ "driver_id", "function_id", "name", "function", "num_return_vals", "module", "resources", "max_calls" ]) function_id = ray.FunctionID(function_id_str) driver_id = ray.DriverID(driver_id_str) function_name = decode(function_name) max_calls = int(max_calls) module = decode(module) # This is a placeholder in case the function can't be unpickled. This # will be overwritten if the function is successfully registered. def f(): raise Exception("This function was not imported properly.") self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo( function=f, function_name=function_name, max_calls=max_calls)) self._num_task_executions[driver_id][function_id] = 0 try: function = pickle.loads(serialized_function) except Exception: # If an exception was thrown when the remote function was imported, # we record the traceback and notify the scheduler of the failure. traceback_str = format_error_message(traceback.format_exc()) # Log the error message. push_error_to_driver( self._worker, ray_constants.REGISTER_REMOTE_FUNCTION_PUSH_ERROR, traceback_str, driver_id=driver_id, data={ "function_id": function_id.binary(), "function_name": function_name }) else: # The below line is necessary. Because in the driver process, # if the function is defined in the file where the python script # was started from, its module is `__main__`. # However in the worker process, the `__main__` module is a # different module, which is `default_worker.py` function.__module__ = module self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo( function=function, function_name=function_name, max_calls=max_calls)) # Add the function to the function table. self._worker.redis_client.rpush( b"FunctionTable:" + function_id.binary(), self._worker.worker_id)
def fetch_and_register_remote_function(self, key): """Import a remote function.""" (driver_id_str, function_id_str, function_name, serialized_function, num_return_vals, module, resources, max_calls) = self._worker.redis_client.hmget(key, [ "driver_id", "function_id", "name", "function", "num_return_vals", "module", "resources", "max_calls" ]) function_id = ray.FunctionID(function_id_str) driver_id = ray.DriverID(driver_id_str) function_name = decode(function_name) max_calls = int(max_calls) module = decode(module) # This is a placeholder in case the function can't be unpickled. This # will be overwritten if the function is successfully registered. def f(): raise Exception("This function was not imported properly.") self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo( function=f, function_name=function_name, max_calls=max_calls)) self._num_task_executions[driver_id][function_id] = 0 try: function = pickle.loads(serialized_function) except Exception: # If an exception was thrown when the remote function was imported, # we record the traceback and notify the scheduler of the failure. traceback_str = format_error_message(traceback.format_exc()) # Log the error message. push_error_to_driver( self._worker, ray_constants.REGISTER_REMOTE_FUNCTION_PUSH_ERROR, "Failed to unpickle the remote function '{}' with function ID " "{}. Traceback:\n{}".format(function_name, function_id.hex(), traceback_str), driver_id=driver_id) else: # The below line is necessary. Because in the driver process, # if the function is defined in the file where the python script # was started from, its module is `__main__`. # However in the worker process, the `__main__` module is a # different module, which is `default_worker.py` function.__module__ = module self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo( function=function, function_name=function_name, max_calls=max_calls)) # Add the function to the function table. self._worker.redis_client.rpush( b"FunctionTable:" + function_id.binary(), self._worker.worker_id)
def fetch_and_register_remote_function(self, key): """Import a remote function.""" from ray.worker import FunctionExecutionInfo (driver_id, function_id_str, function_name, serialized_function, num_return_vals, module, resources, max_calls) = self.redis_client.hmget(key, [ "driver_id", "function_id", "name", "function", "num_return_vals", "module", "resources", "max_calls" ]) function_id = ray.ObjectID(function_id_str) function_name = utils.decode(function_name) max_calls = int(max_calls) module = utils.decode(module) # This is a placeholder in case the function can't be unpickled. This # will be overwritten if the function is successfully registered. def f(): raise Exception("This function was not imported properly.") self.worker.function_execution_info[driver_id][function_id.id()] = ( FunctionExecutionInfo(function=f, function_name=function_name, max_calls=max_calls)) self.worker.num_task_executions[driver_id][function_id.id()] = 0 try: function = pickle.loads(serialized_function) except Exception: # If an exception was thrown when the remote function was imported, # we record the traceback and notify the scheduler of the failure. traceback_str = utils.format_error_message(traceback.format_exc()) # Log the error message. utils.push_error_to_driver( self.worker, ray_constants.REGISTER_REMOTE_FUNCTION_PUSH_ERROR, traceback_str, driver_id=driver_id, data={ "function_id": function_id.id(), "function_name": function_name }) else: # TODO(rkn): Why is the below line necessary? function.__module__ = module self.worker.function_execution_info[driver_id][ function_id.id()] = (FunctionExecutionInfo( function=function, function_name=function_name, max_calls=max_calls)) # Add the function to the function table. self.redis_client.rpush(b"FunctionTable:" + function_id.id(), self.worker.worker_id)
def fetch_and_register_actor(actor_class_key, resources, worker): """Import an actor. This will be called by the worker's import thread when the worker receives the actor_class export, assuming that the worker is an actor for that class. Args: actor_class_key: The key in Redis to use to fetch the actor. resources: The resources required for this actor's lifetime. worker: The worker to use. """ actor_id_str = worker.actor_id (driver_id, class_id, class_name, module, pickled_class, checkpoint_interval, actor_method_names, actor_method_num_return_vals) = worker.redis_client.hmget( actor_class_key, [ "driver_id", "class_id", "class_name", "module", "class", "checkpoint_interval", "actor_method_names", "actor_method_num_return_vals" ]) actor_name = class_name.decode("ascii") module = module.decode("ascii") checkpoint_interval = int(checkpoint_interval) actor_method_names = json.loads(actor_method_names.decode("ascii")) actor_method_num_return_vals = json.loads( actor_method_num_return_vals.decode("ascii")) # Create a temporary actor with some temporary methods so that if the actor # fails to be unpickled, the temporary actor can be used (just to produce # error messages and to prevent the driver from hanging). class TemporaryActor(object): pass worker.actors[actor_id_str] = TemporaryActor() worker.actor_checkpoint_interval = checkpoint_interval def temporary_actor_method(*xs): raise Exception("The actor with name {} failed to be imported, and so " "cannot execute this method".format(actor_name)) # Register the actor method signatures. register_actor_signatures(worker, driver_id, class_id, class_name, actor_method_names, actor_method_num_return_vals) # Register the actor method executors. for actor_method_name in actor_method_names: function_id = compute_actor_method_function_id(class_name, actor_method_name).id() temporary_executor = make_actor_method_executor(worker, actor_method_name, temporary_actor_method, actor_imported=False) worker.functions[driver_id][function_id] = (actor_method_name, temporary_executor) worker.num_task_executions[driver_id][function_id] = 0 try: unpickled_class = pickle.loads(pickled_class) worker.actor_class = unpickled_class except Exception: # If an exception was thrown when the actor was imported, we record the # traceback and notify the scheduler of the failure. traceback_str = ray.utils.format_error_message(traceback.format_exc()) # Log the error message. push_error_to_driver(worker.redis_client, "register_actor_signatures", traceback_str, driver_id, data={"actor_id": actor_id_str}) # TODO(rkn): In the future, it might make sense to have the worker exit # here. However, currently that would lead to hanging if someone calls # ray.get on a method invoked on the actor. else: # TODO(pcm): Why is the below line necessary? unpickled_class.__module__ = module worker.actors[actor_id_str] = unpickled_class.__new__(unpickled_class) def pred(x): return (inspect.isfunction(x) or inspect.ismethod(x) or is_cython(x)) actor_methods = inspect.getmembers(unpickled_class, predicate=pred) for actor_method_name, actor_method in actor_methods: function_id = compute_actor_method_function_id( class_name, actor_method_name).id() executor = make_actor_method_executor(worker, actor_method_name, actor_method, actor_imported=True) worker.functions[driver_id][function_id] = (actor_method_name, executor)
def fetch_and_register_actor(self, actor_class_key): """Import an actor. This will be called by the worker's import thread when the worker receives the actor_class export, assuming that the worker is an actor for that class. Args: actor_class_key: The key in Redis to use to fetch the actor. """ actor_id = self._worker.actor_id (driver_id_str, class_name, module, pickled_class, actor_method_names) = self._worker.redis_client.hmget( actor_class_key, [ "driver_id", "class_name", "module", "class", "actor_method_names" ]) class_name = decode(class_name) module = decode(module) driver_id = ray.DriverID(driver_id_str) actor_method_names = json.loads(decode(actor_method_names)) # In Python 2, json loads strings as unicode, so convert them back to # strings. if sys.version_info < (3, 0): actor_method_names = [ method_name.encode("ascii") for method_name in actor_method_names ] # Create a temporary actor with some temporary methods so that if # the actor fails to be unpickled, the temporary actor can be used # (just to produce error messages and to prevent the driver from # hanging). class TemporaryActor(object): pass self._worker.actors[actor_id] = TemporaryActor() def temporary_actor_method(*xs): raise Exception( "The actor with name {} failed to be imported, " "and so cannot execute this method".format(class_name)) # Register the actor method executors. for actor_method_name in actor_method_names: function_descriptor = FunctionDescriptor(module, actor_method_name, class_name) function_id = function_descriptor.function_id temporary_executor = self._make_actor_method_executor( actor_method_name, temporary_actor_method, actor_imported=False) self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo(function=temporary_executor, function_name=actor_method_name, max_calls=0)) self._num_task_executions[driver_id][function_id] = 0 try: unpickled_class = pickle.loads(pickled_class) self._worker.actor_class = unpickled_class except Exception: # If an exception was thrown when the actor was imported, we record # the traceback and notify the scheduler of the failure. traceback_str = ray.utils.format_error_message( traceback.format_exc()) # Log the error message. push_error_to_driver( self._worker, ray_constants.REGISTER_ACTOR_PUSH_ERROR, "Failed to unpickle actor class '{}' for actor ID {}. " "Traceback:\n{}".format(class_name, actor_id.hex(), traceback_str), driver_id) # TODO(rkn): In the future, it might make sense to have the worker # exit here. However, currently that would lead to hanging if # someone calls ray.get on a method invoked on the actor. else: # TODO(pcm): Why is the below line necessary? unpickled_class.__module__ = module self._worker.actors[actor_id] = unpickled_class.__new__( unpickled_class) actor_methods = inspect.getmembers(unpickled_class, predicate=is_function_or_method) for actor_method_name, actor_method in actor_methods: function_descriptor = FunctionDescriptor( module, actor_method_name, class_name) function_id = function_descriptor.function_id executor = self._make_actor_method_executor( actor_method_name, actor_method, actor_imported=True) self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo(function=executor, function_name=actor_method_name, max_calls=0))
def fetch_and_register_actor(self, actor_class_key): """Import an actor. This will be called by the worker's import thread when the worker receives the actor_class export, assuming that the worker is an actor for that class. Args: actor_class_key: The key in Redis to use to fetch the actor. worker: The worker to use. """ actor_id_str = self._worker.actor_id (driver_id, class_id, class_name, module, pickled_class, checkpoint_interval, actor_method_names) = self._worker.redis_client.hmget( actor_class_key, [ "driver_id", "class_id", "class_name", "module", "class", "checkpoint_interval", "actor_method_names" ]) class_name = decode(class_name) module = decode(module) checkpoint_interval = int(checkpoint_interval) actor_method_names = json.loads(decode(actor_method_names)) # Create a temporary actor with some temporary methods so that if # the actor fails to be unpickled, the temporary actor can be used # (just to produce error messages and to prevent the driver from # hanging). class TemporaryActor(object): pass self._worker.actors[actor_id_str] = TemporaryActor() self._worker.actor_checkpoint_interval = checkpoint_interval def temporary_actor_method(*xs): raise Exception( "The actor with name {} failed to be imported, " "and so cannot execute this method".format(class_name)) # Register the actor method executors. for actor_method_name in actor_method_names: function_id = ( FunctionActorManager.compute_actor_method_function_id( class_name, actor_method_name).id()) temporary_executor = self._make_actor_method_executor( actor_method_name, temporary_actor_method, actor_imported=False) self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo(function=temporary_executor, function_name=actor_method_name, max_calls=0)) self._num_task_executions[driver_id][function_id] = 0 try: unpickled_class = pickle.loads(pickled_class) self._worker.actor_class = unpickled_class except Exception: # If an exception was thrown when the actor was imported, we record # the traceback and notify the scheduler of the failure. traceback_str = ray.utils.format_error_message( traceback.format_exc()) # Log the error message. push_error_to_driver(self._worker, ray_constants.REGISTER_ACTOR_PUSH_ERROR, traceback_str, driver_id, data={"actor_id": actor_id_str}) # TODO(rkn): In the future, it might make sense to have the worker # exit here. However, currently that would lead to hanging if # someone calls ray.get on a method invoked on the actor. else: # TODO(pcm): Why is the below line necessary? unpickled_class.__module__ = module self._worker.actors[actor_id_str] = unpickled_class.__new__( unpickled_class) actor_methods = inspect.getmembers(unpickled_class, predicate=is_function_or_method) for actor_method_name, actor_method in actor_methods: function_id = ( FunctionActorManager.compute_actor_method_function_id( class_name, actor_method_name).id()) executor = self._make_actor_method_executor( actor_method_name, actor_method, actor_imported=True) self._function_execution_info[driver_id][function_id] = ( FunctionExecutionInfo(function=executor, function_name=actor_method_name, max_calls=0))