async def dump_to_storage( paths: List[str], obj: Any, workflow_id: str, storage: storage.Storage, update_existing=True, ) -> None: """Serializes and puts arbitrary object, handling references. The object will be uploaded at `paths`. Any object references will be uploaded to their global, remote storage. Args: paths: The location to put the object. obj: The object to serialize. If it contains object references, those will be serialized too. workflow_id: The workflow id. storage: The storage to use. If obj contains object references, `storage.put` will be called on them individually. update_existing: If False, the object will not be uploaded if the path exists. """ if not update_existing: prefix = storage.make_key(*paths[:-1]) scan_result = await storage.scan_prefix(prefix) if paths[-1] in scan_result: return tasks = [] # NOTE: Cloudpickle doesn't support private dispatch tables, so we extend # the cloudpickler instead to avoid changing cloudpickle's global dispatch # table which is shared with `ray.put`. See # https://github.com/cloudpipe/cloudpickle/issues/437 class ObjectRefPickler(cloudpickle.CloudPickler): _object_ref_reducer = { ray.ObjectRef: lambda ref: _reduce_objectref(workflow_id, storage, ref, tasks) } dispatch_table = ChainMap(_object_ref_reducer, cloudpickle.CloudPickler.dispatch_table) dispatch = dispatch_table key = storage.make_key(*paths) # TODO(Alex): We should be able to do this without the extra buffer. with io.BytesIO() as f: pickler = ObjectRefPickler(f) pickler.dump(obj) f.seek(0) task = storage.put(key, f.read()) tasks.append(task) await asyncio.gather(*tasks)
def _load_object_ref(paths: List[str], storage: storage.Storage) -> ray.ObjectRef: global _object_cache key = storage.make_key(*paths) if _object_cache is None: return _load_ref_helper.remote(key, storage) if _object_cache is None: return _load_ref_helper.remote(key, storage) if key not in _object_cache: _object_cache[key] = _load_ref_helper.remote(key, storage) return _object_cache[key]