예제 #1
0
 def trial_info(self, trial):
     if trial.last_result:
         result = trial.last_result.copy()
     else:
         result = None
     info_dict = {
         "id": trial.trial_id,
         "trainable_name": trial.trainable_name,
         "config": binary_to_hex(cloudpickle.dumps(trial.config)),
         "status": trial.status,
         "result": binary_to_hex(cloudpickle.dumps(result))
     }
     return info_dict
예제 #2
0
파일: trial.py 프로젝트: jamescasbon/ray
    def __getstate__(self):
        """Memento generator for Trial.

        Sets RUNNING trials to PENDING, and flushes the result logger.
        Note this can only occur if the trial holds a DISK checkpoint.
        """
        assert self._checkpoint.storage == Checkpoint.DISK, (
            "Checkpoint must not be in-memory.")
        state = self.__dict__.copy()
        state["resources"] = resources_to_json(self.resources)

        pickle_data = {
            "_checkpoint": self._checkpoint,
            "config": self.config,
            "custom_loggers": self.custom_loggers,
            "sync_function": self.sync_function,
            "last_result": self.last_result
        }

        for key, value in pickle_data.items():
            state[key] = binary_to_hex(cloudpickle.dumps(value))

        state["runner"] = None
        state["result_logger"] = None
        if self.status == Trial.RUNNING:
            state["status"] = Trial.PENDING
        if self.result_logger:
            self.result_logger.flush()
            state["__logger_started__"] = True
        else:
            state["__logger_started__"] = False
        return copy.deepcopy(state)
예제 #3
0
파일: registry.py 프로젝트: jamescasbon/ray
 def register(self, category, key, value):
     if category not in KNOWN_CATEGORIES:
         from ray.tune import TuneError
         raise TuneError("Unknown category {} not among {}".format(
             category, KNOWN_CATEGORIES))
     self._to_flush[(category, key)] = pickle.dumps(value)
     if _internal_kv_initialized():
         self.flush_values()
예제 #4
0
def set_flushing_policy(flushing_policy):
    """Serialize this policy for Monitor to pick up."""
    if "RAY_USE_NEW_GCS" not in os.environ:
        raise Exception(
            "set_flushing_policy() is only available when environment "
            "variable RAY_USE_NEW_GCS is present at both compile and run time."
        )
    ray.worker.global_worker.check_connected()
    redis_client = ray.worker.global_worker.redis_client

    serialized = pickle.dumps(flushing_policy)
    redis_client.set("gcs_flushing_policy", serialized)
예제 #5
0
 def _get_object(self, request, client_id: str, context=None):
     if request.id not in self.object_refs[client_id]:
         return ray_client_pb2.GetResponse(valid=False)
     objectref = self.object_refs[client_id][request.id]
     logger.debug("get: %s" % objectref)
     try:
         with disable_client_hook():
             item = ray.get(objectref, timeout=request.timeout)
     except Exception as e:
         return ray_client_pb2.GetResponse(
             valid=False, error=cloudpickle.dumps(e))
     item_ser = dumps_from_server(item, client_id, self)
     return ray_client_pb2.GetResponse(valid=True, data=item_ser)
예제 #6
0
파일: registry.py 프로젝트: LaoKpa/ray
    def register(self, category, key, value):
        """Registers the value with the global registry.

        Raises:
            PicklingError if unable to pickle to provided file.
        """
        if category not in KNOWN_CATEGORIES:
            from ray.tune import TuneError
            raise TuneError("Unknown category {} not among {}".format(
                category, KNOWN_CATEGORIES))
        self._to_flush[(category, key)] = pickle.dumps(value)
        if _internal_kv_initialized():
            self.flush_values()
def _try_to_compute_deterministic_class_id(cls, depth=5):
    """Attempt to produce a deterministic class ID for a given class.

    The goal here is for the class ID to be the same when this is run on
    different worker processes. Pickling, loading, and pickling again seems to
    produce more consistent results than simply pickling. This is a bit crazy
    and could cause problems, in which case we should revert it and figure out
    something better.

    Args:
        cls: The class to produce an ID for.
        depth: The number of times to repeatedly try to load and dump the
            string while trying to reach a fixed point.

    Returns:
        A class ID for this class. We attempt to make the class ID the same
            when this function is run on different workers, but that is not
            guaranteed.

    Raises:
        Exception: This could raise an exception if cloudpickle raises an
            exception.
    """
    # Pickling, loading, and pickling again seems to produce more consistent
    # results than simply pickling. This is a bit
    class_id = pickle.dumps(cls)
    for _ in range(depth):
        new_class_id = pickle.dumps(pickle.loads(class_id))
        if new_class_id == class_id:
            # We appear to have reached a fix point, so use this as the ID.
            return hashlib.sha1(new_class_id).digest()
        class_id = new_class_id

    # We have not reached a fixed point, so we may end up with a different
    # class ID for this custom class on each worker, which could lead to the
    # same class definition being exported many many times.
    logger.warning(
        f"WARNING: Could not produce a deterministic class ID for class {cls}")
    return hashlib.sha1(new_class_id).digest()
예제 #8
0
파일: server.py 프로젝트: stjordanis/ray
 def _get_object(self, request: ray_client_pb2.GetRequest, client_id: str):
     objectrefs = []
     for rid in request.ids:
         ref = self.object_refs[client_id].get(rid, None)
         if ref:
             objectrefs.append(ref)
         else:
             return ray_client_pb2.GetResponse(
                 valid=False,
                 error=cloudpickle.dumps(
                     ValueError(
                         f"ClientObjectRef {rid} is not found for client "
                         f"{client_id}")))
     try:
         logger.debug("get: %s" % objectrefs)
         with disable_client_hook():
             items = ray.get(objectrefs, timeout=request.timeout)
     except Exception as e:
         return ray_client_pb2.GetResponse(
             valid=False, error=cloudpickle.dumps(e))
     serialized = dumps_from_server(items, client_id, self)
     return ray_client_pb2.GetResponse(valid=True, data=serialized)
예제 #9
0
 def _get_object(self, request: ray_client_pb2.GetRequest, client_id: str):
     objectrefs = []
     for rid in request.ids:
         ref = self.object_refs[client_id].get(rid, None)
         if ref:
             objectrefs.append(ref)
         else:
             yield ray_client_pb2.GetResponse(
                 valid=False,
                 error=cloudpickle.dumps(
                     ValueError(
                         f"ClientObjectRef {rid} is not found for client "
                         f"{client_id}"
                     )
                 ),
             )
             return
     try:
         logger.debug("get: %s" % objectrefs)
         with disable_client_hook():
             items = ray.get(objectrefs, timeout=request.timeout)
     except Exception as e:
         yield ray_client_pb2.GetResponse(valid=False, error=cloudpickle.dumps(e))
         return
     serialized = dumps_from_server(items, client_id, self)
     total_size = len(serialized)
     assert total_size > 0, "Serialized object cannot be zero bytes"
     total_chunks = math.ceil(total_size / OBJECT_TRANSFER_CHUNK_SIZE)
     for chunk_id in range(request.start_chunk_id, total_chunks):
         start = chunk_id * OBJECT_TRANSFER_CHUNK_SIZE
         end = min(total_size, (chunk_id + 1) * OBJECT_TRANSFER_CHUNK_SIZE)
         yield ray_client_pb2.GetResponse(
             valid=True,
             data=serialized[start:end],
             chunk_id=chunk_id,
             total_chunks=total_chunks,
             total_size=total_size,
         )
예제 #10
0
def calculate_identifier(obj: Any) -> str:
    """Calculate a url-safe identifier for an object."""

    # Step 1: Serialize the object.
    # Step 2: Calculate its sha256 hash.
    # Step 3: Get the url safe, base64 representation of it.

    # TODO (Alex): Ideally we should use the existing ObjectRef serializer to
    # avoid duplicate serialization passes and support nested object refs.
    m = hashlib.sha256()
    m.update(cloudpickle.dumps(obj))
    hash = m.digest()
    encoded = base64.urlsafe_b64encode(hash).decode("ascii")
    return encoded
예제 #11
0
    def _checkpoint(self):
        """Checkpoint internal state and write it to the KV store."""
        logger.debug("Writing checkpoint")
        start = time.time()
        checkpoint = pickle.dumps(
            (self.routes, self.backends, self.traffic_policies, self.replicas,
             self.replicas_to_start, self.replicas_to_stop))

        self.kv_store_client.put("checkpoint", checkpoint)
        logger.debug("Wrote checkpoint in {:.2f}".format(time.time() - start))

        if random.random() < _CRASH_AFTER_CHECKPOINT_PROBABILITY:
            logger.warning("Intentionally crashing after checkpoint")
            os._exit(0)
예제 #12
0
파일: server.py 프로젝트: wuisawesome/ray
 def send_get_response(result: Any) -> None:
     """Pushes a GetResponse to the main DataPath loop to send
     to the client. This is called when the object is ready
     on the server side."""
     try:
         serialized = dumps_from_server(result, client_id, self)
         get_resp = ray_client_pb2.GetResponse(valid=True,
                                               data=serialized)
     except Exception as exc:
         get_resp = ray_client_pb2.GetResponse(
             valid=False, error=cloudpickle.dumps(exc))
     resp = ray_client_pb2.DataResponse(get=get_resp,
                                        req_id=req_id)
     result_queue.put(resp)
예제 #13
0
파일: common.py 프로젝트: haochihlin/ray
def _hash(obj: Any) -> bytes:
    """
    Calculates a sha256 hash of an object.
    """
    # TODO (Alex): Ideally we shouldn't let ray serialize obj to begin with.
    # (1) It would save us an unnecessary serialization/deserialization. (2)
    # Cloudpickle doesn't isn't always stable, so for some objects (i.e.
    # functions) this could produce inconsistent results.
    m = hashlib.sha256()
    # TODO (Alex): We should handle the nested ObjectRef case. This naive
    # cloudpickle.dumps will return different a different hash when run on the
    # recovered version of an object.
    m.update(cloudpickle.dumps(obj))
    return m.digest()
예제 #14
0
    def serialize(self, value):
        """Serialize an object.

        Args:
            value: The value to serialize.
        """
        if isinstance(value, bytes):
            # If the object is a byte array, skip serializing it and
            # use a special metadata to indicate it's raw binary. So
            # that this object can also be read by Java.
            return RawSerializedObject(value)

        if self.worker.use_pickle:
            writer = Pickle5Writer()
            if ray.cloudpickle.FAST_CLOUDPICKLE_USED:
                inband = pickle.dumps(value,
                                      protocol=5,
                                      buffer_callback=writer.buffer_callback)
            else:
                inband = pickle.dumps(value)
            return Pickle5SerializedObject(inband, writer)
        else:
            try:
                serialized_value = self._store_and_register_pyarrow(value)
            except TypeError:
                # TypeError can happen because one of the members of the object
                # may not be serializable for cloudpickle. So we need
                # these extra fallbacks here to start from the beginning.
                # Hopefully the object could have a `__reduce__` method.
                self.register_custom_serializer(type(value), use_pickle=True)
                logger.warning("WARNING: Serializing the class {} failed, "
                               "falling back to cloudpickle.".format(
                                   type(value)))
                serialized_value = self._store_and_register_pyarrow(value)

            return ArrowSerializedObject(serialized_value)
예제 #15
0
    def _real_size(self, item: Any) -> int:
        is_client = ray.util.client.ray.is_connected()
        # In client mode, fallback to using Ray cloudpickle instead of the
        # real serializer.
        if is_client:
            return len(cloudpickle.dumps(item))

        # We're using an internal Ray API, and have to ensure it's
        # initialized # by calling a public API.
        global _ray_initialized
        if not _ray_initialized:
            _ray_initialized = True
            ray.put(None)
        return (ray.worker.global_worker.get_serialization_context().serialize(
            item).total_bytes)
예제 #16
0
    def test_replica_config_lazy_deserialization(self):
        def f():
            return "Check this out!"

        f_serialized = cloudpickle.dumps(f)
        config = ReplicaConfig(
            "f", f_serialized, cloudpickle.dumps(()), cloudpickle.dumps({}), {}
        )

        assert config.serialized_deployment_def == f_serialized
        assert config._deployment_def is None

        assert config.serialized_init_args == cloudpickle.dumps(tuple())
        assert config._init_args is None

        assert config.serialized_init_kwargs == cloudpickle.dumps(dict())
        assert config._init_kwargs is None

        assert isinstance(config.ray_actor_options, dict)
        assert isinstance(config.resource_dict, dict)

        assert config.deployment_def() == "Check this out!"
        assert config.init_args == tuple()
        assert config.init_kwargs == dict()
예제 #17
0
    def export(self, remote_function):
        """Pickle a remote function and export it to redis.
        Args:
            remote_function: the RemoteFunction object.
        """
        if self._worker.load_code_from_local:
            function_descriptor = remote_function._function_descriptor
            module_name, function_name = (
                function_descriptor.module_name,
                function_descriptor.function_name,
            )
            # If the function is dynamic, we still export it to GCS
            # even if load_code_from_local is set True.
            if (
                self.load_function_or_class_from_local(module_name, function_name)
                is not None
            ):
                return
        function = remote_function._function
        pickled_function = remote_function._pickled_function

        check_oversized_function(
            pickled_function,
            remote_function._function_name,
            "remote function",
            self._worker,
        )
        key = make_function_table_key(
            b"RemoteFunction",
            self._worker.current_job_id,
            remote_function._function_descriptor.function_id.binary(),
        )
        if self._worker.gcs_client.internal_kv_exists(key, KV_NAMESPACE_FUNCTION_TABLE):
            return
        val = pickle.dumps(
            {
                "job_id": self._worker.current_job_id.binary(),
                "function_id": remote_function._function_descriptor.function_id.binary(),  # noqa: E501
                "function_name": remote_function._function_name,
                "module": function.__module__,
                "function": pickled_function,
                "collision_identifier": self.compute_collision_identifier(function),
                "max_calls": remote_function._max_calls,
            }
        )
        self._worker.gcs_client.internal_kv_put(
            key, val, True, KV_NAMESPACE_FUNCTION_TABLE
        )
예제 #18
0
    def export_actor_class(self, Class, actor_creation_function_descriptor,
                           actor_method_names):
        if self._worker.load_code_from_local:
            module_name, class_name = (
                actor_creation_function_descriptor.module_name,
                actor_creation_function_descriptor.class_name)
            # If the class is dynamic, we still export it to GCS
            # even if load_code_from_local is set True.
            if self.load_function_or_class_from_local(module_name,
                                                      class_name) is not None:
                return

        # `current_job_id` shouldn't be NIL, unless:
        # 1) This worker isn't an actor;
        # 2) And a previous task started a background thread, which didn't
        #    finish before the task finished, and still uses Ray API
        #    after that.
        assert not self._worker.current_job_id.is_nil(), (
            "You might have started a background thread in a non-actor "
            "task, please make sure the thread finishes before the "
            "task finishes.")
        job_id = self._worker.current_job_id
        key = (b"ActorClass:" + job_id.hex().encode() + b":" +
               actor_creation_function_descriptor.function_id.binary())
        try:
            serialized_actor_class = pickle.dumps(Class)
        except TypeError as e:
            msg = (
                "Could not serialize the actor class "
                f"{actor_creation_function_descriptor.repr}. "
                "Check https://docs.ray.io/en/master/serialization.html#troubleshooting "  # noqa
                "for more information.")
            raise TypeError(msg) from e
        actor_class_info = {
            "class_name": actor_creation_function_descriptor.class_name.split(
                ".")[-1],
            "module": actor_creation_function_descriptor.module_name,
            "class": serialized_actor_class,
            "job_id": job_id.binary(),
            "collision_identifier": self.compute_collision_identifier(Class),
            "actor_method_names": json.dumps(list(actor_method_names))
        }

        check_oversized_function(actor_class_info["class"],
                                 actor_class_info["class_name"], "actor",
                                 self._worker)

        self._publish_actor_class_to_key(key, actor_class_info)
예제 #19
0
    def _do_export(self, remote_function):
        """Pickle a remote function and export it to redis.

        Args:
            remote_function: the RemoteFunction object.
        """
        if self._worker.load_code_from_local:
            return
        # Work around limitations of Python pickling.
        function = remote_function._function
        function_name_global_valid = function.__name__ in function.__globals__
        function_name_global_value = function.__globals__.get(
            function.__name__)
        # Allow the function to reference itself as a global variable
        if not is_cython(function):
            function.__globals__[function.__name__] = remote_function
        try:
            pickled_function = pickle.dumps(function)
        finally:
            # Undo our changes
            if function_name_global_valid:
                function.__globals__[function.__name__] = (
                    function_name_global_value)
            else:
                del function.__globals__[function.__name__]

        check_oversized_pickle(pickled_function,
                               remote_function._function_name,
                               "remote function", self._worker)
        key = (b"RemoteFunction:" + self._worker.current_job_id.binary() +
               b":" +
               remote_function._function_descriptor.function_id.binary())
        self._worker.redis_client.hmset(
            key, {
                "job_id":
                self._worker.current_job_id.binary(),
                "function_id":
                remote_function._function_descriptor.function_id.binary(),
                "name":
                remote_function._function_name,
                "module":
                function.__module__,
                "function":
                pickled_function,
                "max_calls":
                remote_function._max_calls
            })
        self._worker.redis_client.rpush("Exports", key)
예제 #20
0
 def _publish_actor_class_to_key(self, key, actor_class_info):
     """Push an actor class definition to Redis.
     The is factored out as a separate function because it is also called
     on cached actor class definitions when a worker connects for the first
     time.
     Args:
         key: The key to store the actor class info at.
         actor_class_info: Information about the actor class.
     """
     # We set the driver ID here because it may not have been available when
     # the actor class was defined.
     self._worker.gcs_client.internal_kv_put(key,
                                             pickle.dumps(actor_class_info),
                                             True,
                                             KV_NAMESPACE_FUNCTION_TABLE)
     self.export_key(key)
예제 #21
0
    def _serialize_to_pickle5(self, metadata, value):
        writer = Pickle5Writer()
        # TODO(swang): Check that contained_object_refs is empty.
        try:
            self.set_in_band_serialization()
            inband = pickle.dumps(
                value, protocol=5, buffer_callback=writer.buffer_callback)
        except Exception as e:
            self.get_and_clear_contained_object_refs()
            raise e
        finally:
            self.set_out_of_band_serialization()

        return Pickle5SerializedObject(
            metadata, inband, writer,
            self.get_and_clear_contained_object_refs())
예제 #22
0
 def run(self, mpi_func: Callable, timeout=None) -> Any:
     assert self.started
     func_request = network_pb2.Function(func_id=self.func_id,
                                         func=cloudpickle.dumps(mpi_func))
     with self.lock:
         self.func_result = FunctionResults(self.func_id, self.world_size)
     send = [meta.stub.RunFunction(func_request) for meta in self.workers]
     self.func_id += 1
     self.func_result.done.wait(timeout)
     with self.lock:
         if self.func_result:
             results = self.func_result.results
             assert len(results) == self.world_size, "function call failed"
             return self.func_result.results
         else:
             raise Exception("function call failed")
예제 #23
0
파일: server.py 프로젝트: tseiger1/ray
 def _schedule_method(
         self,
         task: ray_client_pb2.ClientTask,
         context=None,
         prepared_args=None) -> ray_client_pb2.ClientTaskTicket:
     actor_handle = self.actor_refs.get(task.payload_id)
     if actor_handle is None:
         raise Exception(
             "Can't run an actor the server doesn't have a handle for")
     arglist = _convert_args(task.args, prepared_args)
     with stash_api_for_tests(self._test_mode):
         output = getattr(actor_handle, task.name).remote(*arglist)
         self.object_refs[output.binary()] = output
         pickled_ref = cloudpickle.dumps(output)
     return ray_client_pb2.ClientTaskTicket(
         return_ref=make_remote_ref(output.binary(), pickled_ref))
예제 #24
0
    def _checkpoint(self) -> None:
        """Checkpoint internal state and write it to the KV store."""
        assert self.write_lock.locked()
        logger.debug("Writing checkpoint")
        start = time.time()

        checkpoint = pickle.dumps(
            Checkpoint(self.configuration_store, self.actor_reconciler))

        self.kv_store.put(CHECKPOINT_KEY, checkpoint)
        logger.debug("Wrote checkpoint in {:.2f}".format(time.time() - start))

        if random.random(
        ) < _CRASH_AFTER_CHECKPOINT_PROBABILITY and self.detached:
            logger.warning("Intentionally crashing after checkpoint")
            os._exit(0)
예제 #25
0
파일: node.py 프로젝트: wuisawesome/ray
    def __init__(
        self,
        callable_factory: Callable[[], Callable],
        config: StepConfig,
        incoming_edges: Tuple[PipelineNode],
    ):
        # Serialize to make this class environment-independent.
        self._serialized_callable_factory: bytes = cloudpickle.dumps(
            callable_factory)
        self._config: StepConfig = config
        self._incoming_edges: PipelineNode = incoming_edges

        # Populated in .deploy().
        self._executor: Executor = None

        assert len(self._incoming_edges) > 0
예제 #26
0
    def _checkpoint(self):
        """Checkpoint internal state and write it to the KV store."""
        assert self.write_lock.locked()
        logger.debug("Writing checkpoint")
        start = time.time()
        checkpoint = pickle.dumps(
            (self.routes, list(
                self.routers.keys()), self.backends, self.traffic_policies,
             self.replicas, self.replicas_to_start, self.replicas_to_stop,
             self.backends_to_remove, self.endpoints_to_remove))

        self.kv_store.put(CHECKPOINT_KEY, checkpoint)
        logger.debug("Wrote checkpoint in {:.2f}".format(time.time() - start))

        if random.random() < _CRASH_AFTER_CHECKPOINT_PROBABILITY:
            logger.warning("Intentionally crashing after checkpoint")
            os._exit(0)
예제 #27
0
def test_actor_method_metadata_cache(ray_start_regular):
    class Actor(object):
        pass

    # The cache of ActorClassMethodMetadata.
    cache = ray.actor.ActorClassMethodMetadata._cache
    cache.clear()

    # Check cache hit during ActorHandle deserialization.
    A1 = ray.remote(Actor)
    a = A1.remote()
    assert len(cache) == 1
    cached_data_id = [id(x) for x in list(cache.items())[0]]
    for x in range(10):
        a = pickle.loads(pickle.dumps(a))
    assert len(ray.actor.ActorClassMethodMetadata._cache) == 1
    assert [id(x) for x in list(cache.items())[0]] == cached_data_id
예제 #28
0
파일: controller.py 프로젝트: paravatha/ray
    def _checkpoint(self) -> None:
        """Checkpoint internal state and write it to the KV store."""
        assert self.write_lock.locked()
        logger.debug("Writing checkpoint")
        start = time.time()

        checkpoint = pickle.dumps(
            Checkpoint(self.backend_state.checkpoint(),
                       self._serializable_inflight_results))

        self.kv_store.put(CHECKPOINT_KEY, checkpoint)
        logger.debug("Wrote checkpoint in {:.3f}s".format(time.time() - start))

        if random.random(
        ) < _CRASH_AFTER_CHECKPOINT_PROBABILITY and self.detached:
            logger.warning("Intentionally crashing after checkpoint")
            os._exit(0)
예제 #29
0
파일: server.py 프로젝트: haochihlin/ray
    def _async_get_object(
            self,
            request: ray_client_pb2.GetRequest,
            client_id: str,
            req_id: int,
            result_queue: queue.Queue,
            context=None) -> Optional[ray_client_pb2.GetResponse]:
        """Attempts to schedule a callback to push the GetResponse to the
        main loop when the desired object is ready. If there is some failure
        in scheduling, a GetResponse will be immediately returned.
        """
        refs = []
        for rid in request.ids:
            ref = self.object_refs[client_id].get(rid, None)
            if ref:
                refs.append(ref)
            else:
                return ray_client_pb2.GetResponse(valid=False)
        try:
            logger.debug("async get: %s" % refs)
            with disable_client_hook():

                def send_get_response(result: Any) -> None:
                    """Pushes a GetResponse to the main DataPath loop to send
                    to the client. This is called when the object is ready
                    on the server side."""
                    try:
                        serialized = dumps_from_server(result, client_id, self)
                        get_resp = ray_client_pb2.GetResponse(valid=True,
                                                              data=serialized)
                    except Exception as e:
                        get_resp = ray_client_pb2.GetResponse(
                            valid=False, error=cloudpickle.dumps(e))
                    resp = ray_client_pb2.DataResponse(get=get_resp,
                                                       req_id=req_id)
                    resp.req_id = req_id
                    result_queue.put(resp)

                for ref in refs:
                    ref._on_completed(send_get_response)
                return None

        except Exception as e:
            return ray_client_pb2.GetResponse(valid=False,
                                              error=cloudpickle.dumps(e))
예제 #30
0
 def pickle_checkpoint(checkpoint_path: str):
     """Pickles checkpoint data."""
     checkpoint_dir = TrainableUtil.find_checkpoint_dir(checkpoint_path)
     data = {}
     for basedir, _, file_names in os.walk(checkpoint_dir):
         for file_name in file_names:
             path = os.path.join(basedir, file_name)
             with open(path, "rb") as f:
                 data[os.path.relpath(path, checkpoint_dir)] = f.read()
     # Use normpath so that a directory path isn't mapped to empty string.
     name = os.path.relpath(os.path.normpath(checkpoint_path),
                            checkpoint_dir)
     name += os.path.sep if os.path.isdir(checkpoint_path) else ""
     data_dict = pickle.dumps({
         "checkpoint_name": name,
         "data": data,
     })
     return data_dict
예제 #31
0
    def export_actor_class(self, Class, actor_creation_function_descriptor,
                           actor_method_names):
        if self._worker.load_code_from_local:
            module_name, class_name = (
                actor_creation_function_descriptor.module_name,
                actor_creation_function_descriptor.class_name)
            # If the class is dynamic, we still export it to GCS
            # even if load_code_from_local is set True.
            if self.load_function_or_class_from_local(module_name,
                                                      class_name) is not None:
                return

        # `current_job_id` shouldn't be NIL, unless:
        # 1) This worker isn't an actor;
        # 2) And a previous task started a background thread, which didn't
        #    finish before the task finished, and still uses Ray API
        #    after that.
        assert not self._worker.current_job_id.is_nil(), (
            "You might have started a background thread in a non-actor "
            "task, please make sure the thread finishes before the "
            "task finishes.")
        job_id = self._worker.current_job_id
        key = (b"ActorClass:" + job_id.binary() + b":" +
               actor_creation_function_descriptor.function_id.binary())
        actor_class_info = {
            "class_name":
            actor_creation_function_descriptor.class_name.split(".")[-1],
            "module":
            actor_creation_function_descriptor.module_name,
            "class":
            pickle.dumps(Class),
            "job_id":
            job_id.binary(),
            "collision_identifier":
            self.compute_collision_identifier(Class),
            "actor_method_names":
            json.dumps(list(actor_method_names))
        }

        check_oversized_function(actor_class_info["class"],
                                 actor_class_info["class_name"], "actor",
                                 self._worker)

        self._publish_actor_class_to_key(key, actor_class_info)
예제 #32
0
def hash_bucket(annotated_delta_manifests: List[Dict[str, Any]],
                column_names: List[str],
                primary_keys: List[str],
                sort_keys: List[str],
                num_buckets: int,
                num_groups: int,
                deltacat_storage=unimplemented_deltacat_storage):

    logger.info(f"Starting hash bucket task...")
    hash_bucket_group_to_obj_id = np.empty([num_groups], dtype="object")

    delta_file_envelope_groups = group_file_records_by_pk_hash_bucket(
        annotated_delta_manifests,
        num_buckets,
        column_names,
        primary_keys,
        sort_keys,
        deltacat_storage,
    )
    if delta_file_envelope_groups is None:
        return hash_bucket_group_to_obj_id

    # write grouped output data to files including the group name
    hb_group_to_delta_file_envelopes = np.empty([num_groups], dtype="object")
    for hb_index in range(len(delta_file_envelope_groups)):
        delta_file_envelopes = delta_file_envelope_groups[hb_index]
        if delta_file_envelopes:
            hb_group = hb_index % num_groups
            if hb_group_to_delta_file_envelopes[hb_group] is None:
                hb_group_to_delta_file_envelopes[hb_group] = np.empty(
                    [num_buckets], dtype="object")
            hb_group_to_delta_file_envelopes[hb_group][hb_index] = \
                delta_file_envelopes

    object_refs = []
    for hb_group in range(len(hb_group_to_delta_file_envelopes)):
        delta_file_envelopes = hb_group_to_delta_file_envelopes[hb_group]
        if delta_file_envelopes is not None:
            obj_ref = ray.put(delta_file_envelopes)
            object_refs.append(obj_ref)
            hash_bucket_group_to_obj_id[hb_group] = cloudpickle.dumps(obj_ref)

    logger.info(f"Finished hash bucket task...")
    return hash_bucket_group_to_obj_id, object_refs
예제 #33
0
def send(signal):
    """Send signal.

    The signal has a unique identifier that is computed from (1) the id
    of the actor or task sending this signal (i.e., the actor or task calling
    this function), and (2) an index that is incremented every time this
    source sends a signal. This index starts from 1.

    Args:
        signal: Signal to be sent.
    """
    if ray.worker.global_worker.actor_id.is_nil():
        source_key = ray.worker.global_worker.current_task_id.hex()
    else:
        source_key = ray.worker.global_worker.actor_id.hex()

    encoded_signal = ray.utils.binary_to_hex(cloudpickle.dumps(signal))
    ray.worker.global_worker.redis_client.execute_command(
        "XADD " + source_key + " * signal " + encoded_signal)
예제 #34
0
def _fetch_metadata_remotely(
    pieces: List["pyarrow._dataset.ParquetFileFragment"],
) -> List[ObjectRef["pyarrow.parquet.FileMetaData"]]:
    from ray import cloudpickle

    remote_fetch_metadata = cached_remote_fn(_fetch_metadata_serialization_wrapper)
    metas = []
    parallelism = min(len(pieces) // PIECES_PER_META_FETCH, 100)
    meta_fetch_bar = ProgressBar("Metadata Fetch Progress", total=parallelism)
    try:
        _register_parquet_file_fragment_serialization()
        for pcs in np.array_split(pieces, parallelism):
            if len(pcs) == 0:
                continue
            metas.append(remote_fetch_metadata.remote(cloudpickle.dumps(pcs)))
    finally:
        _deregister_parquet_file_fragment_serialization()
    metas = meta_fetch_bar.fetch_until_complete(metas)
    return list(itertools.chain.from_iterable(metas))
예제 #35
0
    def _do_export(self, remote_function):
        """Pickle a remote function and export it to redis.

        Args:
            remote_function: the RemoteFunction object.
        """
        if self._worker.load_code_from_local:
            return
        # Work around limitations of Python pickling.
        function = remote_function._function
        function_name_global_valid = function.__name__ in function.__globals__
        function_name_global_value = function.__globals__.get(
            function.__name__)
        # Allow the function to reference itself as a global variable
        if not is_cython(function):
            function.__globals__[function.__name__] = remote_function
        try:
            pickled_function = pickle.dumps(function)
        finally:
            # Undo our changes
            if function_name_global_valid:
                function.__globals__[function.__name__] = (
                    function_name_global_value)
            else:
                del function.__globals__[function.__name__]

        check_oversized_pickle(pickled_function,
                               remote_function._function_name,
                               "remote function", self._worker)
        key = (b"RemoteFunction:" + self._worker.task_driver_id.binary() + b":"
               + remote_function._function_descriptor.function_id.binary())
        self._worker.redis_client.hmset(
            key, {
                "driver_id": self._worker.task_driver_id.binary(),
                "function_id": remote_function._function_descriptor.
                function_id.binary(),
                "name": remote_function._function_name,
                "module": function.__module__,
                "function": pickled_function,
                "max_calls": remote_function._max_calls
            })
        self._worker.redis_client.rpush("Exports", key)
예제 #36
0
def send(signal):
    """Send signal.

    The signal has a unique identifier that is computed from (1) the id
    of the actor or task sending this signal (i.e., the actor or task calling
    this function), and (2) an index that is incremented every time this
    source sends a signal. This index starts from 1.

    Args:
        signal: Signal to be sent.
    """
    if hasattr(ray.worker.global_worker, "actor_creation_task_id"):
        source_key = ray.worker.global_worker.actor_id.hex()
    else:
        # No actors; this function must have been called from a task
        source_key = ray.worker.global_worker.current_task_id.hex()

    encoded_signal = ray.utils.binary_to_hex(cloudpickle.dumps(signal))
    ray.worker.global_worker.redis_client.execute_command(
        "XADD " + source_key + " * signal " + encoded_signal)
예제 #37
0
    def __getstate__(self):
        """Memento generator for Trial.

        Sets RUNNING trials to PENDING, and flushes the result logger.
        Note this can only occur if the trial holds a DISK checkpoint.
        """
        assert self._checkpoint.storage == Checkpoint.DISK, (
            "Checkpoint must not be in-memory.")
        state = self.__dict__.copy()
        state["resources"] = resources_to_json(self.resources)

        for key in self._nonjson_fields:
            state[key] = binary_to_hex(cloudpickle.dumps(state.get(key)))

        state["runner"] = None
        state["result_logger"] = None
        if self.result_logger:
            self.result_logger.flush()
            state["__logger_started__"] = True
        else:
            state["__logger_started__"] = False
        return copy.deepcopy(state)
예제 #38
0
    def export_actor_class(self, Class, actor_method_names):
        if self._worker.load_code_from_local:
            return
        function_descriptor = FunctionDescriptor.from_class(Class)
        # `task_driver_id` shouldn't be NIL, unless:
        # 1) This worker isn't an actor;
        # 2) And a previous task started a background thread, which didn't
        #    finish before the task finished, and still uses Ray API
        #    after that.
        assert not self._worker.task_driver_id.is_nil(), (
            "You might have started a background thread in a non-actor task, "
            "please make sure the thread finishes before the task finishes.")
        driver_id = self._worker.task_driver_id
        key = (b"ActorClass:" + driver_id.binary() + b":" +
               function_descriptor.function_id.binary())
        actor_class_info = {
            "class_name": Class.__name__,
            "module": Class.__module__,
            "class": pickle.dumps(Class),
            "driver_id": driver_id.binary(),
            "actor_method_names": json.dumps(list(actor_method_names))
        }

        check_oversized_pickle(actor_class_info["class"],
                               actor_class_info["class_name"], "actor",
                               self._worker)

        if self._worker.mode is None:
            # This means that 'ray.init()' has not been called yet and so we
            # must cache the actor class definition and export it when
            # 'ray.init()' is called.
            assert self._actors_to_export is not None
            self._actors_to_export.append((key, actor_class_info))
            # This caching code path is currently not used because we only
            # export actor class definitions lazily when we instantiate the
            # actor for the first time.
            assert False, "This should be unreachable."
        else:
            self._publish_actor_class_to_key(key, actor_class_info)
예제 #39
0
def register_actor(name, actor_handle):
    """Register a named actor under a string key.

   Args:
       name: The name of the named actor.
       actor_handle: The actor object to be associated with this name
   """
    if not isinstance(name, str):
        raise TypeError("The name argument must be a string.")
    if not isinstance(actor_handle, ray.actor.ActorHandle):
        raise TypeError("The actor_handle argument must be an ActorHandle "
                        "object.")
    actor_name = _calculate_key(name)
    pickled_state = pickle.dumps(actor_handle)

    # Add the actor to Redis if it does not already exist.
    already_exists = _internal_kv_put(actor_name, pickled_state)
    if already_exists:
        # If the registration fails, then erase the new actor handle that
        # was added when pickling the actor handle.
        actor_handle._ray_new_actor_handles.pop()
        raise ValueError(
            "Error: the actor with name={} already exists".format(name))
예제 #40
0
 def serialize(self):
     return pickle.dumps(self)