def trial_info(self, trial): if trial.last_result: result = trial.last_result.copy() else: result = None info_dict = { "id": trial.trial_id, "trainable_name": trial.trainable_name, "config": binary_to_hex(cloudpickle.dumps(trial.config)), "status": trial.status, "result": binary_to_hex(cloudpickle.dumps(result)) } return info_dict
def __getstate__(self): """Memento generator for Trial. Sets RUNNING trials to PENDING, and flushes the result logger. Note this can only occur if the trial holds a DISK checkpoint. """ assert self._checkpoint.storage == Checkpoint.DISK, ( "Checkpoint must not be in-memory.") state = self.__dict__.copy() state["resources"] = resources_to_json(self.resources) pickle_data = { "_checkpoint": self._checkpoint, "config": self.config, "custom_loggers": self.custom_loggers, "sync_function": self.sync_function, "last_result": self.last_result } for key, value in pickle_data.items(): state[key] = binary_to_hex(cloudpickle.dumps(value)) state["runner"] = None state["result_logger"] = None if self.status == Trial.RUNNING: state["status"] = Trial.PENDING if self.result_logger: self.result_logger.flush() state["__logger_started__"] = True else: state["__logger_started__"] = False return copy.deepcopy(state)
def register(self, category, key, value): if category not in KNOWN_CATEGORIES: from ray.tune import TuneError raise TuneError("Unknown category {} not among {}".format( category, KNOWN_CATEGORIES)) self._to_flush[(category, key)] = pickle.dumps(value) if _internal_kv_initialized(): self.flush_values()
def set_flushing_policy(flushing_policy): """Serialize this policy for Monitor to pick up.""" if "RAY_USE_NEW_GCS" not in os.environ: raise Exception( "set_flushing_policy() is only available when environment " "variable RAY_USE_NEW_GCS is present at both compile and run time." ) ray.worker.global_worker.check_connected() redis_client = ray.worker.global_worker.redis_client serialized = pickle.dumps(flushing_policy) redis_client.set("gcs_flushing_policy", serialized)
def _get_object(self, request, client_id: str, context=None): if request.id not in self.object_refs[client_id]: return ray_client_pb2.GetResponse(valid=False) objectref = self.object_refs[client_id][request.id] logger.debug("get: %s" % objectref) try: with disable_client_hook(): item = ray.get(objectref, timeout=request.timeout) except Exception as e: return ray_client_pb2.GetResponse( valid=False, error=cloudpickle.dumps(e)) item_ser = dumps_from_server(item, client_id, self) return ray_client_pb2.GetResponse(valid=True, data=item_ser)
def register(self, category, key, value): """Registers the value with the global registry. Raises: PicklingError if unable to pickle to provided file. """ if category not in KNOWN_CATEGORIES: from ray.tune import TuneError raise TuneError("Unknown category {} not among {}".format( category, KNOWN_CATEGORIES)) self._to_flush[(category, key)] = pickle.dumps(value) if _internal_kv_initialized(): self.flush_values()
def _try_to_compute_deterministic_class_id(cls, depth=5): """Attempt to produce a deterministic class ID for a given class. The goal here is for the class ID to be the same when this is run on different worker processes. Pickling, loading, and pickling again seems to produce more consistent results than simply pickling. This is a bit crazy and could cause problems, in which case we should revert it and figure out something better. Args: cls: The class to produce an ID for. depth: The number of times to repeatedly try to load and dump the string while trying to reach a fixed point. Returns: A class ID for this class. We attempt to make the class ID the same when this function is run on different workers, but that is not guaranteed. Raises: Exception: This could raise an exception if cloudpickle raises an exception. """ # Pickling, loading, and pickling again seems to produce more consistent # results than simply pickling. This is a bit class_id = pickle.dumps(cls) for _ in range(depth): new_class_id = pickle.dumps(pickle.loads(class_id)) if new_class_id == class_id: # We appear to have reached a fix point, so use this as the ID. return hashlib.sha1(new_class_id).digest() class_id = new_class_id # We have not reached a fixed point, so we may end up with a different # class ID for this custom class on each worker, which could lead to the # same class definition being exported many many times. logger.warning( f"WARNING: Could not produce a deterministic class ID for class {cls}") return hashlib.sha1(new_class_id).digest()
def _get_object(self, request: ray_client_pb2.GetRequest, client_id: str): objectrefs = [] for rid in request.ids: ref = self.object_refs[client_id].get(rid, None) if ref: objectrefs.append(ref) else: return ray_client_pb2.GetResponse( valid=False, error=cloudpickle.dumps( ValueError( f"ClientObjectRef {rid} is not found for client " f"{client_id}"))) try: logger.debug("get: %s" % objectrefs) with disable_client_hook(): items = ray.get(objectrefs, timeout=request.timeout) except Exception as e: return ray_client_pb2.GetResponse( valid=False, error=cloudpickle.dumps(e)) serialized = dumps_from_server(items, client_id, self) return ray_client_pb2.GetResponse(valid=True, data=serialized)
def _get_object(self, request: ray_client_pb2.GetRequest, client_id: str): objectrefs = [] for rid in request.ids: ref = self.object_refs[client_id].get(rid, None) if ref: objectrefs.append(ref) else: yield ray_client_pb2.GetResponse( valid=False, error=cloudpickle.dumps( ValueError( f"ClientObjectRef {rid} is not found for client " f"{client_id}" ) ), ) return try: logger.debug("get: %s" % objectrefs) with disable_client_hook(): items = ray.get(objectrefs, timeout=request.timeout) except Exception as e: yield ray_client_pb2.GetResponse(valid=False, error=cloudpickle.dumps(e)) return serialized = dumps_from_server(items, client_id, self) total_size = len(serialized) assert total_size > 0, "Serialized object cannot be zero bytes" total_chunks = math.ceil(total_size / OBJECT_TRANSFER_CHUNK_SIZE) for chunk_id in range(request.start_chunk_id, total_chunks): start = chunk_id * OBJECT_TRANSFER_CHUNK_SIZE end = min(total_size, (chunk_id + 1) * OBJECT_TRANSFER_CHUNK_SIZE) yield ray_client_pb2.GetResponse( valid=True, data=serialized[start:end], chunk_id=chunk_id, total_chunks=total_chunks, total_size=total_size, )
def calculate_identifier(obj: Any) -> str: """Calculate a url-safe identifier for an object.""" # Step 1: Serialize the object. # Step 2: Calculate its sha256 hash. # Step 3: Get the url safe, base64 representation of it. # TODO (Alex): Ideally we should use the existing ObjectRef serializer to # avoid duplicate serialization passes and support nested object refs. m = hashlib.sha256() m.update(cloudpickle.dumps(obj)) hash = m.digest() encoded = base64.urlsafe_b64encode(hash).decode("ascii") return encoded
def _checkpoint(self): """Checkpoint internal state and write it to the KV store.""" logger.debug("Writing checkpoint") start = time.time() checkpoint = pickle.dumps( (self.routes, self.backends, self.traffic_policies, self.replicas, self.replicas_to_start, self.replicas_to_stop)) self.kv_store_client.put("checkpoint", checkpoint) logger.debug("Wrote checkpoint in {:.2f}".format(time.time() - start)) if random.random() < _CRASH_AFTER_CHECKPOINT_PROBABILITY: logger.warning("Intentionally crashing after checkpoint") os._exit(0)
def send_get_response(result: Any) -> None: """Pushes a GetResponse to the main DataPath loop to send to the client. This is called when the object is ready on the server side.""" try: serialized = dumps_from_server(result, client_id, self) get_resp = ray_client_pb2.GetResponse(valid=True, data=serialized) except Exception as exc: get_resp = ray_client_pb2.GetResponse( valid=False, error=cloudpickle.dumps(exc)) resp = ray_client_pb2.DataResponse(get=get_resp, req_id=req_id) result_queue.put(resp)
def _hash(obj: Any) -> bytes: """ Calculates a sha256 hash of an object. """ # TODO (Alex): Ideally we shouldn't let ray serialize obj to begin with. # (1) It would save us an unnecessary serialization/deserialization. (2) # Cloudpickle doesn't isn't always stable, so for some objects (i.e. # functions) this could produce inconsistent results. m = hashlib.sha256() # TODO (Alex): We should handle the nested ObjectRef case. This naive # cloudpickle.dumps will return different a different hash when run on the # recovered version of an object. m.update(cloudpickle.dumps(obj)) return m.digest()
def serialize(self, value): """Serialize an object. Args: value: The value to serialize. """ if isinstance(value, bytes): # If the object is a byte array, skip serializing it and # use a special metadata to indicate it's raw binary. So # that this object can also be read by Java. return RawSerializedObject(value) if self.worker.use_pickle: writer = Pickle5Writer() if ray.cloudpickle.FAST_CLOUDPICKLE_USED: inband = pickle.dumps(value, protocol=5, buffer_callback=writer.buffer_callback) else: inband = pickle.dumps(value) return Pickle5SerializedObject(inband, writer) else: try: serialized_value = self._store_and_register_pyarrow(value) except TypeError: # TypeError can happen because one of the members of the object # may not be serializable for cloudpickle. So we need # these extra fallbacks here to start from the beginning. # Hopefully the object could have a `__reduce__` method. self.register_custom_serializer(type(value), use_pickle=True) logger.warning("WARNING: Serializing the class {} failed, " "falling back to cloudpickle.".format( type(value))) serialized_value = self._store_and_register_pyarrow(value) return ArrowSerializedObject(serialized_value)
def _real_size(self, item: Any) -> int: is_client = ray.util.client.ray.is_connected() # In client mode, fallback to using Ray cloudpickle instead of the # real serializer. if is_client: return len(cloudpickle.dumps(item)) # We're using an internal Ray API, and have to ensure it's # initialized # by calling a public API. global _ray_initialized if not _ray_initialized: _ray_initialized = True ray.put(None) return (ray.worker.global_worker.get_serialization_context().serialize( item).total_bytes)
def test_replica_config_lazy_deserialization(self): def f(): return "Check this out!" f_serialized = cloudpickle.dumps(f) config = ReplicaConfig( "f", f_serialized, cloudpickle.dumps(()), cloudpickle.dumps({}), {} ) assert config.serialized_deployment_def == f_serialized assert config._deployment_def is None assert config.serialized_init_args == cloudpickle.dumps(tuple()) assert config._init_args is None assert config.serialized_init_kwargs == cloudpickle.dumps(dict()) assert config._init_kwargs is None assert isinstance(config.ray_actor_options, dict) assert isinstance(config.resource_dict, dict) assert config.deployment_def() == "Check this out!" assert config.init_args == tuple() assert config.init_kwargs == dict()
def export(self, remote_function): """Pickle a remote function and export it to redis. Args: remote_function: the RemoteFunction object. """ if self._worker.load_code_from_local: function_descriptor = remote_function._function_descriptor module_name, function_name = ( function_descriptor.module_name, function_descriptor.function_name, ) # If the function is dynamic, we still export it to GCS # even if load_code_from_local is set True. if ( self.load_function_or_class_from_local(module_name, function_name) is not None ): return function = remote_function._function pickled_function = remote_function._pickled_function check_oversized_function( pickled_function, remote_function._function_name, "remote function", self._worker, ) key = make_function_table_key( b"RemoteFunction", self._worker.current_job_id, remote_function._function_descriptor.function_id.binary(), ) if self._worker.gcs_client.internal_kv_exists(key, KV_NAMESPACE_FUNCTION_TABLE): return val = pickle.dumps( { "job_id": self._worker.current_job_id.binary(), "function_id": remote_function._function_descriptor.function_id.binary(), # noqa: E501 "function_name": remote_function._function_name, "module": function.__module__, "function": pickled_function, "collision_identifier": self.compute_collision_identifier(function), "max_calls": remote_function._max_calls, } ) self._worker.gcs_client.internal_kv_put( key, val, True, KV_NAMESPACE_FUNCTION_TABLE )
def export_actor_class(self, Class, actor_creation_function_descriptor, actor_method_names): if self._worker.load_code_from_local: module_name, class_name = ( actor_creation_function_descriptor.module_name, actor_creation_function_descriptor.class_name) # If the class is dynamic, we still export it to GCS # even if load_code_from_local is set True. if self.load_function_or_class_from_local(module_name, class_name) is not None: return # `current_job_id` shouldn't be NIL, unless: # 1) This worker isn't an actor; # 2) And a previous task started a background thread, which didn't # finish before the task finished, and still uses Ray API # after that. assert not self._worker.current_job_id.is_nil(), ( "You might have started a background thread in a non-actor " "task, please make sure the thread finishes before the " "task finishes.") job_id = self._worker.current_job_id key = (b"ActorClass:" + job_id.hex().encode() + b":" + actor_creation_function_descriptor.function_id.binary()) try: serialized_actor_class = pickle.dumps(Class) except TypeError as e: msg = ( "Could not serialize the actor class " f"{actor_creation_function_descriptor.repr}. " "Check https://docs.ray.io/en/master/serialization.html#troubleshooting " # noqa "for more information.") raise TypeError(msg) from e actor_class_info = { "class_name": actor_creation_function_descriptor.class_name.split( ".")[-1], "module": actor_creation_function_descriptor.module_name, "class": serialized_actor_class, "job_id": job_id.binary(), "collision_identifier": self.compute_collision_identifier(Class), "actor_method_names": json.dumps(list(actor_method_names)) } check_oversized_function(actor_class_info["class"], actor_class_info["class_name"], "actor", self._worker) self._publish_actor_class_to_key(key, actor_class_info)
def _do_export(self, remote_function): """Pickle a remote function and export it to redis. Args: remote_function: the RemoteFunction object. """ if self._worker.load_code_from_local: return # Work around limitations of Python pickling. function = remote_function._function function_name_global_valid = function.__name__ in function.__globals__ function_name_global_value = function.__globals__.get( function.__name__) # Allow the function to reference itself as a global variable if not is_cython(function): function.__globals__[function.__name__] = remote_function try: pickled_function = pickle.dumps(function) finally: # Undo our changes if function_name_global_valid: function.__globals__[function.__name__] = ( function_name_global_value) else: del function.__globals__[function.__name__] check_oversized_pickle(pickled_function, remote_function._function_name, "remote function", self._worker) key = (b"RemoteFunction:" + self._worker.current_job_id.binary() + b":" + remote_function._function_descriptor.function_id.binary()) self._worker.redis_client.hmset( key, { "job_id": self._worker.current_job_id.binary(), "function_id": remote_function._function_descriptor.function_id.binary(), "name": remote_function._function_name, "module": function.__module__, "function": pickled_function, "max_calls": remote_function._max_calls }) self._worker.redis_client.rpush("Exports", key)
def _publish_actor_class_to_key(self, key, actor_class_info): """Push an actor class definition to Redis. The is factored out as a separate function because it is also called on cached actor class definitions when a worker connects for the first time. Args: key: The key to store the actor class info at. actor_class_info: Information about the actor class. """ # We set the driver ID here because it may not have been available when # the actor class was defined. self._worker.gcs_client.internal_kv_put(key, pickle.dumps(actor_class_info), True, KV_NAMESPACE_FUNCTION_TABLE) self.export_key(key)
def _serialize_to_pickle5(self, metadata, value): writer = Pickle5Writer() # TODO(swang): Check that contained_object_refs is empty. try: self.set_in_band_serialization() inband = pickle.dumps( value, protocol=5, buffer_callback=writer.buffer_callback) except Exception as e: self.get_and_clear_contained_object_refs() raise e finally: self.set_out_of_band_serialization() return Pickle5SerializedObject( metadata, inband, writer, self.get_and_clear_contained_object_refs())
def run(self, mpi_func: Callable, timeout=None) -> Any: assert self.started func_request = network_pb2.Function(func_id=self.func_id, func=cloudpickle.dumps(mpi_func)) with self.lock: self.func_result = FunctionResults(self.func_id, self.world_size) send = [meta.stub.RunFunction(func_request) for meta in self.workers] self.func_id += 1 self.func_result.done.wait(timeout) with self.lock: if self.func_result: results = self.func_result.results assert len(results) == self.world_size, "function call failed" return self.func_result.results else: raise Exception("function call failed")
def _schedule_method( self, task: ray_client_pb2.ClientTask, context=None, prepared_args=None) -> ray_client_pb2.ClientTaskTicket: actor_handle = self.actor_refs.get(task.payload_id) if actor_handle is None: raise Exception( "Can't run an actor the server doesn't have a handle for") arglist = _convert_args(task.args, prepared_args) with stash_api_for_tests(self._test_mode): output = getattr(actor_handle, task.name).remote(*arglist) self.object_refs[output.binary()] = output pickled_ref = cloudpickle.dumps(output) return ray_client_pb2.ClientTaskTicket( return_ref=make_remote_ref(output.binary(), pickled_ref))
def _checkpoint(self) -> None: """Checkpoint internal state and write it to the KV store.""" assert self.write_lock.locked() logger.debug("Writing checkpoint") start = time.time() checkpoint = pickle.dumps( Checkpoint(self.configuration_store, self.actor_reconciler)) self.kv_store.put(CHECKPOINT_KEY, checkpoint) logger.debug("Wrote checkpoint in {:.2f}".format(time.time() - start)) if random.random( ) < _CRASH_AFTER_CHECKPOINT_PROBABILITY and self.detached: logger.warning("Intentionally crashing after checkpoint") os._exit(0)
def __init__( self, callable_factory: Callable[[], Callable], config: StepConfig, incoming_edges: Tuple[PipelineNode], ): # Serialize to make this class environment-independent. self._serialized_callable_factory: bytes = cloudpickle.dumps( callable_factory) self._config: StepConfig = config self._incoming_edges: PipelineNode = incoming_edges # Populated in .deploy(). self._executor: Executor = None assert len(self._incoming_edges) > 0
def _checkpoint(self): """Checkpoint internal state and write it to the KV store.""" assert self.write_lock.locked() logger.debug("Writing checkpoint") start = time.time() checkpoint = pickle.dumps( (self.routes, list( self.routers.keys()), self.backends, self.traffic_policies, self.replicas, self.replicas_to_start, self.replicas_to_stop, self.backends_to_remove, self.endpoints_to_remove)) self.kv_store.put(CHECKPOINT_KEY, checkpoint) logger.debug("Wrote checkpoint in {:.2f}".format(time.time() - start)) if random.random() < _CRASH_AFTER_CHECKPOINT_PROBABILITY: logger.warning("Intentionally crashing after checkpoint") os._exit(0)
def test_actor_method_metadata_cache(ray_start_regular): class Actor(object): pass # The cache of ActorClassMethodMetadata. cache = ray.actor.ActorClassMethodMetadata._cache cache.clear() # Check cache hit during ActorHandle deserialization. A1 = ray.remote(Actor) a = A1.remote() assert len(cache) == 1 cached_data_id = [id(x) for x in list(cache.items())[0]] for x in range(10): a = pickle.loads(pickle.dumps(a)) assert len(ray.actor.ActorClassMethodMetadata._cache) == 1 assert [id(x) for x in list(cache.items())[0]] == cached_data_id
def _checkpoint(self) -> None: """Checkpoint internal state and write it to the KV store.""" assert self.write_lock.locked() logger.debug("Writing checkpoint") start = time.time() checkpoint = pickle.dumps( Checkpoint(self.backend_state.checkpoint(), self._serializable_inflight_results)) self.kv_store.put(CHECKPOINT_KEY, checkpoint) logger.debug("Wrote checkpoint in {:.3f}s".format(time.time() - start)) if random.random( ) < _CRASH_AFTER_CHECKPOINT_PROBABILITY and self.detached: logger.warning("Intentionally crashing after checkpoint") os._exit(0)
def _async_get_object( self, request: ray_client_pb2.GetRequest, client_id: str, req_id: int, result_queue: queue.Queue, context=None) -> Optional[ray_client_pb2.GetResponse]: """Attempts to schedule a callback to push the GetResponse to the main loop when the desired object is ready. If there is some failure in scheduling, a GetResponse will be immediately returned. """ refs = [] for rid in request.ids: ref = self.object_refs[client_id].get(rid, None) if ref: refs.append(ref) else: return ray_client_pb2.GetResponse(valid=False) try: logger.debug("async get: %s" % refs) with disable_client_hook(): def send_get_response(result: Any) -> None: """Pushes a GetResponse to the main DataPath loop to send to the client. This is called when the object is ready on the server side.""" try: serialized = dumps_from_server(result, client_id, self) get_resp = ray_client_pb2.GetResponse(valid=True, data=serialized) except Exception as e: get_resp = ray_client_pb2.GetResponse( valid=False, error=cloudpickle.dumps(e)) resp = ray_client_pb2.DataResponse(get=get_resp, req_id=req_id) resp.req_id = req_id result_queue.put(resp) for ref in refs: ref._on_completed(send_get_response) return None except Exception as e: return ray_client_pb2.GetResponse(valid=False, error=cloudpickle.dumps(e))
def pickle_checkpoint(checkpoint_path: str): """Pickles checkpoint data.""" checkpoint_dir = TrainableUtil.find_checkpoint_dir(checkpoint_path) data = {} for basedir, _, file_names in os.walk(checkpoint_dir): for file_name in file_names: path = os.path.join(basedir, file_name) with open(path, "rb") as f: data[os.path.relpath(path, checkpoint_dir)] = f.read() # Use normpath so that a directory path isn't mapped to empty string. name = os.path.relpath(os.path.normpath(checkpoint_path), checkpoint_dir) name += os.path.sep if os.path.isdir(checkpoint_path) else "" data_dict = pickle.dumps({ "checkpoint_name": name, "data": data, }) return data_dict
def export_actor_class(self, Class, actor_creation_function_descriptor, actor_method_names): if self._worker.load_code_from_local: module_name, class_name = ( actor_creation_function_descriptor.module_name, actor_creation_function_descriptor.class_name) # If the class is dynamic, we still export it to GCS # even if load_code_from_local is set True. if self.load_function_or_class_from_local(module_name, class_name) is not None: return # `current_job_id` shouldn't be NIL, unless: # 1) This worker isn't an actor; # 2) And a previous task started a background thread, which didn't # finish before the task finished, and still uses Ray API # after that. assert not self._worker.current_job_id.is_nil(), ( "You might have started a background thread in a non-actor " "task, please make sure the thread finishes before the " "task finishes.") job_id = self._worker.current_job_id key = (b"ActorClass:" + job_id.binary() + b":" + actor_creation_function_descriptor.function_id.binary()) actor_class_info = { "class_name": actor_creation_function_descriptor.class_name.split(".")[-1], "module": actor_creation_function_descriptor.module_name, "class": pickle.dumps(Class), "job_id": job_id.binary(), "collision_identifier": self.compute_collision_identifier(Class), "actor_method_names": json.dumps(list(actor_method_names)) } check_oversized_function(actor_class_info["class"], actor_class_info["class_name"], "actor", self._worker) self._publish_actor_class_to_key(key, actor_class_info)
def hash_bucket(annotated_delta_manifests: List[Dict[str, Any]], column_names: List[str], primary_keys: List[str], sort_keys: List[str], num_buckets: int, num_groups: int, deltacat_storage=unimplemented_deltacat_storage): logger.info(f"Starting hash bucket task...") hash_bucket_group_to_obj_id = np.empty([num_groups], dtype="object") delta_file_envelope_groups = group_file_records_by_pk_hash_bucket( annotated_delta_manifests, num_buckets, column_names, primary_keys, sort_keys, deltacat_storage, ) if delta_file_envelope_groups is None: return hash_bucket_group_to_obj_id # write grouped output data to files including the group name hb_group_to_delta_file_envelopes = np.empty([num_groups], dtype="object") for hb_index in range(len(delta_file_envelope_groups)): delta_file_envelopes = delta_file_envelope_groups[hb_index] if delta_file_envelopes: hb_group = hb_index % num_groups if hb_group_to_delta_file_envelopes[hb_group] is None: hb_group_to_delta_file_envelopes[hb_group] = np.empty( [num_buckets], dtype="object") hb_group_to_delta_file_envelopes[hb_group][hb_index] = \ delta_file_envelopes object_refs = [] for hb_group in range(len(hb_group_to_delta_file_envelopes)): delta_file_envelopes = hb_group_to_delta_file_envelopes[hb_group] if delta_file_envelopes is not None: obj_ref = ray.put(delta_file_envelopes) object_refs.append(obj_ref) hash_bucket_group_to_obj_id[hb_group] = cloudpickle.dumps(obj_ref) logger.info(f"Finished hash bucket task...") return hash_bucket_group_to_obj_id, object_refs
def send(signal): """Send signal. The signal has a unique identifier that is computed from (1) the id of the actor or task sending this signal (i.e., the actor or task calling this function), and (2) an index that is incremented every time this source sends a signal. This index starts from 1. Args: signal: Signal to be sent. """ if ray.worker.global_worker.actor_id.is_nil(): source_key = ray.worker.global_worker.current_task_id.hex() else: source_key = ray.worker.global_worker.actor_id.hex() encoded_signal = ray.utils.binary_to_hex(cloudpickle.dumps(signal)) ray.worker.global_worker.redis_client.execute_command( "XADD " + source_key + " * signal " + encoded_signal)
def _fetch_metadata_remotely( pieces: List["pyarrow._dataset.ParquetFileFragment"], ) -> List[ObjectRef["pyarrow.parquet.FileMetaData"]]: from ray import cloudpickle remote_fetch_metadata = cached_remote_fn(_fetch_metadata_serialization_wrapper) metas = [] parallelism = min(len(pieces) // PIECES_PER_META_FETCH, 100) meta_fetch_bar = ProgressBar("Metadata Fetch Progress", total=parallelism) try: _register_parquet_file_fragment_serialization() for pcs in np.array_split(pieces, parallelism): if len(pcs) == 0: continue metas.append(remote_fetch_metadata.remote(cloudpickle.dumps(pcs))) finally: _deregister_parquet_file_fragment_serialization() metas = meta_fetch_bar.fetch_until_complete(metas) return list(itertools.chain.from_iterable(metas))
def _do_export(self, remote_function): """Pickle a remote function and export it to redis. Args: remote_function: the RemoteFunction object. """ if self._worker.load_code_from_local: return # Work around limitations of Python pickling. function = remote_function._function function_name_global_valid = function.__name__ in function.__globals__ function_name_global_value = function.__globals__.get( function.__name__) # Allow the function to reference itself as a global variable if not is_cython(function): function.__globals__[function.__name__] = remote_function try: pickled_function = pickle.dumps(function) finally: # Undo our changes if function_name_global_valid: function.__globals__[function.__name__] = ( function_name_global_value) else: del function.__globals__[function.__name__] check_oversized_pickle(pickled_function, remote_function._function_name, "remote function", self._worker) key = (b"RemoteFunction:" + self._worker.task_driver_id.binary() + b":" + remote_function._function_descriptor.function_id.binary()) self._worker.redis_client.hmset( key, { "driver_id": self._worker.task_driver_id.binary(), "function_id": remote_function._function_descriptor. function_id.binary(), "name": remote_function._function_name, "module": function.__module__, "function": pickled_function, "max_calls": remote_function._max_calls }) self._worker.redis_client.rpush("Exports", key)
def send(signal): """Send signal. The signal has a unique identifier that is computed from (1) the id of the actor or task sending this signal (i.e., the actor or task calling this function), and (2) an index that is incremented every time this source sends a signal. This index starts from 1. Args: signal: Signal to be sent. """ if hasattr(ray.worker.global_worker, "actor_creation_task_id"): source_key = ray.worker.global_worker.actor_id.hex() else: # No actors; this function must have been called from a task source_key = ray.worker.global_worker.current_task_id.hex() encoded_signal = ray.utils.binary_to_hex(cloudpickle.dumps(signal)) ray.worker.global_worker.redis_client.execute_command( "XADD " + source_key + " * signal " + encoded_signal)
def __getstate__(self): """Memento generator for Trial. Sets RUNNING trials to PENDING, and flushes the result logger. Note this can only occur if the trial holds a DISK checkpoint. """ assert self._checkpoint.storage == Checkpoint.DISK, ( "Checkpoint must not be in-memory.") state = self.__dict__.copy() state["resources"] = resources_to_json(self.resources) for key in self._nonjson_fields: state[key] = binary_to_hex(cloudpickle.dumps(state.get(key))) state["runner"] = None state["result_logger"] = None if self.result_logger: self.result_logger.flush() state["__logger_started__"] = True else: state["__logger_started__"] = False return copy.deepcopy(state)
def export_actor_class(self, Class, actor_method_names): if self._worker.load_code_from_local: return function_descriptor = FunctionDescriptor.from_class(Class) # `task_driver_id` shouldn't be NIL, unless: # 1) This worker isn't an actor; # 2) And a previous task started a background thread, which didn't # finish before the task finished, and still uses Ray API # after that. assert not self._worker.task_driver_id.is_nil(), ( "You might have started a background thread in a non-actor task, " "please make sure the thread finishes before the task finishes.") driver_id = self._worker.task_driver_id key = (b"ActorClass:" + driver_id.binary() + b":" + function_descriptor.function_id.binary()) actor_class_info = { "class_name": Class.__name__, "module": Class.__module__, "class": pickle.dumps(Class), "driver_id": driver_id.binary(), "actor_method_names": json.dumps(list(actor_method_names)) } check_oversized_pickle(actor_class_info["class"], actor_class_info["class_name"], "actor", self._worker) if self._worker.mode is None: # This means that 'ray.init()' has not been called yet and so we # must cache the actor class definition and export it when # 'ray.init()' is called. assert self._actors_to_export is not None self._actors_to_export.append((key, actor_class_info)) # This caching code path is currently not used because we only # export actor class definitions lazily when we instantiate the # actor for the first time. assert False, "This should be unreachable." else: self._publish_actor_class_to_key(key, actor_class_info)
def register_actor(name, actor_handle): """Register a named actor under a string key. Args: name: The name of the named actor. actor_handle: The actor object to be associated with this name """ if not isinstance(name, str): raise TypeError("The name argument must be a string.") if not isinstance(actor_handle, ray.actor.ActorHandle): raise TypeError("The actor_handle argument must be an ActorHandle " "object.") actor_name = _calculate_key(name) pickled_state = pickle.dumps(actor_handle) # Add the actor to Redis if it does not already exist. already_exists = _internal_kv_put(actor_name, pickled_state) if already_exists: # If the registration fails, then erase the new actor handle that # was added when pickling the actor handle. actor_handle._ray_new_actor_handles.pop() raise ValueError( "Error: the actor with name={} already exists".format(name))
def serialize(self): return pickle.dumps(self)