Ejemplo n.º 1
0
async def dump_to_storage(
    paths: List[str],
    obj: Any,
    workflow_id: str,
    storage: storage.Storage,
    update_existing=True,
) -> None:
    """Serializes and puts arbitrary object, handling references. The object will
        be uploaded at `paths`. Any object references will be uploaded to their
        global, remote storage.

    Args:
        paths: The location to put the object.
        obj: The object to serialize. If it contains object references, those
                will be serialized too.
        workflow_id: The workflow id.
        storage: The storage to use. If obj contains object references,
                `storage.put` will be called on them individually.
        update_existing: If False, the object will not be uploaded if the path
                exists.
    """
    if not update_existing:
        prefix = storage.make_key(*paths[:-1])
        scan_result = await storage.scan_prefix(prefix)
        if paths[-1] in scan_result:
            return

    tasks = []

    # NOTE: Cloudpickle doesn't support private dispatch tables, so we extend
    # the cloudpickler instead to avoid changing cloudpickle's global dispatch
    # table which is shared with `ray.put`. See
    # https://github.com/cloudpipe/cloudpickle/issues/437
    class ObjectRefPickler(cloudpickle.CloudPickler):
        _object_ref_reducer = {
            ray.ObjectRef:
            lambda ref: _reduce_objectref(workflow_id, storage, ref, tasks)
        }
        dispatch_table = ChainMap(_object_ref_reducer,
                                  cloudpickle.CloudPickler.dispatch_table)
        dispatch = dispatch_table

    key = storage.make_key(*paths)

    # TODO(Alex): We should be able to do this without the extra buffer.
    with io.BytesIO() as f:
        pickler = ObjectRefPickler(f)
        pickler.dump(obj)
        f.seek(0)
        task = storage.put(key, f.read())
        tasks.append(task)

    await asyncio.gather(*tasks)
Ejemplo n.º 2
0
def _load_object_ref(paths: List[str],
                     storage: storage.Storage) -> ray.ObjectRef:
    global _object_cache
    key = storage.make_key(*paths)
    if _object_cache is None:
        return _load_ref_helper.remote(key, storage)

    if _object_cache is None:
        return _load_ref_helper.remote(key, storage)

    if key not in _object_cache:
        _object_cache[key] = _load_ref_helper.remote(key, storage)

    return _object_cache[key]