Python get_runtime_context Exemples, ray.get_runtime_context Python Exemples

Exemple #1

0

Afficher le fichier

    def _put_serve_snapshot(self) -> None:
        val = dict()
        for deployment_name, (
                deployment_info,
                route_prefix,
        ) in self.list_deployments_internal(include_deleted=True).items():
            entry = dict()
            entry["name"] = deployment_name
            entry["namespace"] = ray.get_runtime_context().namespace
            entry["ray_job_id"] = deployment_info.deployer_job_id.hex()
            entry[
                "class_name"] = deployment_info.replica_config.deployment_def_name
            entry["version"] = deployment_info.version
            entry["http_route"] = route_prefix
            entry["start_time"] = deployment_info.start_time_ms
            entry["end_time"] = deployment_info.end_time_ms or 0
            entry[
                "status"] = "DELETED" if deployment_info.end_time_ms else "RUNNING"
            entry["actors"] = dict()
            if entry["status"] == "RUNNING":
                replicas = self.deployment_state_manager._deployment_states[
                    deployment_name]._replicas
                running_replicas = replicas.get([ReplicaState.RUNNING])
                for replica in running_replicas:
                    try:
                        actor_handle = replica.actor_handle
                    except ValueError:
                        # Actor died or hasn't yet been created.
                        continue
                    actor_id = actor_handle._ray_actor_id.hex()
                    replica_tag = replica.replica_tag
                    replica_version = (None if (replica.version is None
                                                or replica.version.unversioned)
                                       else replica.version.code_version)
                    entry["actors"][actor_id] = {
                        "replica_tag": replica_tag,
                        "version": replica_version,
                    }

            val[deployment_name] = entry
        self.snapshot_store.put(SNAPSHOT_KEY, json.dumps(val).encode("utf-8"))

Exemple #2

0

Afficher le fichier

    async def __init__(
        self,
        controller_name: str,
        http_config: HTTPOptions,
        checkpoint_path: str,
        detached: bool = False,
    ):
        # Used to read/write checkpoints.
        self.controller_namespace = ray.get_runtime_context().namespace
        self.controller_name = controller_name
        self.checkpoint_path = checkpoint_path
        kv_store_namespace = f"{self.controller_name}-{self.controller_namespace}"
        self.kv_store = make_kv_store(checkpoint_path, namespace=kv_store_namespace)
        self.snapshot_store = RayInternalKVStore(namespace=kv_store_namespace)

        # Dictionary of deployment_name -> proxy_name -> queue length.
        self.deployment_stats = defaultdict(lambda: defaultdict(dict))

        # Used to ensure that only a single state-changing operation happens
        # at any given time.
        self.write_lock = asyncio.Lock()

        self.long_poll_host = LongPollHost()

        self.http_state = HTTPState(controller_name, detached, http_config)
        self.endpoint_state = EndpointState(self.kv_store, self.long_poll_host)
        # Fetch all running actors in current cluster as source of current
        # replica state for controller failure recovery
        all_current_actor_names = ray.util.list_named_actors()
        self.deployment_state_manager = DeploymentStateManager(
            controller_name,
            detached,
            self.kv_store,
            self.long_poll_host,
            all_current_actor_names,
        )

        # TODO(simon): move autoscaling related stuff into a manager.
        self.autoscaling_metrics_store = InMemoryMetricsStore()

        asyncio.get_event_loop().create_task(self.run_control_loop())

Exemple #3

0

Afficher le fichier

Fichier : test_standalone.py Projet : marload/ray

def test_serve_controller_namespace(ray_shutdown, namespace: Optional[str],
                                    detached: bool):
    """
    Tests the serve controller is started in the current namespace if not
    anonymous or in the "serve" namespace if no namespace is specified.
    When the controller is started in the "serve" namespace, this also tests
    that we can get the serve controller from another namespace.
    """

    ray.init(namespace=namespace)
    serve.start(detached=detached)
    client = serve.api._global_client
    if namespace:
        controller_namespace = namespace
    elif detached:
        controller_namespace = "serve"
    else:
        controller_namespace = ray.get_runtime_context().namespace

    assert ray.get_actor(client._controller_name,
                         namespace=controller_namespace)

Exemple #4

0

Afficher le fichier

Fichier : rpdb.py Projet : ddworak94/ray

def connect_ray_pdb(host=None,
                    port=None,
                    patch_stdstreams=False,
                    quiet=None,
                    breakpoint_uuid=None):
    """
    Opens a remote PDB on first available port.
    """
    if host is None:
        host = os.environ.get("REMOTE_PDB_HOST", "127.0.0.1")
    if port is None:
        port = int(os.environ.get("REMOTE_PDB_PORT", "0"))
    if quiet is None:
        quiet = bool(os.environ.get("REMOTE_PDB_QUIET", ""))
    if not breakpoint_uuid:
        breakpoint_uuid = uuid.uuid4().hex
    rdb = RemotePdb(
        breakpoint_uuid=breakpoint_uuid,
        host=host,
        port=port,
        patch_stdstreams=patch_stdstreams,
        quiet=quiet)
    sockname = rdb._listen_socket.getsockname()
    pdb_address = "{}:{}".format(sockname[0], sockname[1])
    parentframeinfo = inspect.getouterframes(inspect.currentframe())[2]
    data = {
        "proctitle": setproctitle.getproctitle(),
        "pdb_address": pdb_address,
        "filename": parentframeinfo.filename,
        "lineno": parentframeinfo.lineno,
        "traceback": "\n".join(traceback.format_exception(*sys.exc_info())),
        "timestamp": time.time(),
        "job_id": ray.get_runtime_context().job_id.hex(),
    }
    _internal_kv_put(
        "RAY_PDB_{}".format(breakpoint_uuid), json.dumps(data), overwrite=True)
    rdb.listen()
    _internal_kv_del("RAY_PDB_{}".format(breakpoint_uuid))

    return rdb

Exemple #5

0

Afficher le fichier

def resume_async(workflow_id: str) -> ray.ObjectRef:
    """Resume a workflow asynchronously.

    Resume a workflow and retrieve its output. If the workflow was incomplete,
    it will be re-executed from its checkpointed outputs. If the workflow was
    complete, returns the result immediately.

    Examples:
        >>> from ray import workflow
        >>> start_trip = ... # doctest: +SKIP
        >>> trip = start_trip.step() # doctest: +SKIP
        >>> res1 = trip.run_async(workflow_id="trip1") # doctest: +SKIP
        >>> res2 = workflow.resume("trip1") # doctest: +SKIP
        >>> assert ray.get(res1) == ray.get(res2) # doctest: +SKIP

    Args:
        workflow_id: The id of the workflow to resume.

    Returns:
        An object reference that can be used to retrieve the workflow result.
    """
    _ensure_workflow_initialized()
    logger.info(f'Resuming workflow [id="{workflow_id}"].')
    workflow_manager = workflow_access.get_management_actor()
    if ray.get(
            workflow_manager.is_workflow_non_terminating.remote(workflow_id)):
        raise RuntimeError(
            f"Workflow '{workflow_id}' is already running or pending.")
    # NOTE: It is important to 'ray.get' the returned output. This
    # ensures caller of 'run()' holds the reference to the workflow
    # result. Otherwise if the actor removes the reference of the
    # workflow output, the caller may fail to resolve the result.
    job_id = ray.get_runtime_context().job_id.hex()

    context = workflow_context.WorkflowStepContext(workflow_id=workflow_id)
    ray.get(workflow_manager.reconstruct_workflow.remote(job_id, context))
    result = workflow_manager.execute_workflow.remote(job_id, context)
    logger.info(f"Workflow job {workflow_id} resumed.")
    return result

Exemple #6

0

Afficher le fichier

 def _put_serve_snapshot(self) -> None:
     val = dict()
     for deployment_name, (backend_info,
                           route_prefix) in self.list_deployments().items():
         entry = dict()
         entry["name"] = deployment_name
         entry["namespace"] = ray.get_runtime_context().namespace
         entry["ray_job_id"] = ("None"
                                if backend_info.deployer_job_id is None else
                                backend_info.deployer_job_id.hex())
         entry[
             "class_name"] = backend_info.replica_config.func_or_class_name
         entry["version"] = backend_info.version or "Unversioned"
         # TODO(architkulkarni): When we add the feature to allow
         # deployments with no HTTP route, update the below line.
         # Or refactor the route_prefix logic in the Deployment class now.
         entry["http_route"] = route_prefix or f"/{deployment_name}"
         entry["status"] = "RUNNING"
         entry["start_time"] = 0
         entry["end_time"] = 0
         val[deployment_name] = entry
     self.kv_store.put(SNAPSHOT_KEY, json.dumps(val).encode("utf-8"))

Exemple #7

0

Afficher le fichier

 def ClusterInfo(self,
                 request,
                 context=None) -> ray_client_pb2.ClusterInfoResponse:
     resp = ray_client_pb2.ClusterInfoResponse()
     resp.type = request.type
     if request.type == ray_client_pb2.ClusterInfoType.CLUSTER_RESOURCES:
         with disable_client_hook():
             resources = ray.cluster_resources()
         # Normalize resources into floats
         # (the function may return values that are ints)
         float_resources = {k: float(v) for k, v in resources.items()}
         resp.resource_table.CopyFrom(
             ray_client_pb2.ClusterInfoResponse.ResourceTable(
                 table=float_resources))
     elif request.type == \
             ray_client_pb2.ClusterInfoType.AVAILABLE_RESOURCES:
         with disable_client_hook():
             resources = ray.available_resources()
         # Normalize resources into floats
         # (the function may return values that are ints)
         float_resources = {k: float(v) for k, v in resources.items()}
         resp.resource_table.CopyFrom(
             ray_client_pb2.ClusterInfoResponse.ResourceTable(
                 table=float_resources))
     elif request.type == ray_client_pb2.ClusterInfoType.RUNTIME_CONTEXT:
         ctx = ray_client_pb2.ClusterInfoResponse.RuntimeContext()
         with disable_client_hook():
             rtc = ray.get_runtime_context()
             ctx.job_id = rtc.job_id.binary()
             ctx.node_id = rtc.node_id.binary()
             ctx.namespace = rtc.namespace
             ctx.capture_client_tasks = \
                 rtc.should_capture_child_tasks_in_placement_group
             ctx.runtime_env = json.dumps(rtc.runtime_env)
         resp.runtime_context.CopyFrom(ctx)
     else:
         with disable_client_hook():
             resp.json = self._return_debug_cluster_info(request, context)
     return resp

Exemple #8

0

Afficher le fichier

Fichier : controller.py Projet : parasj/ray

 def __init__(
     self,
     controller_name: str,
     checkpoint_path: str,
     detached: bool = False,
     dedicated_cpu: bool = False,
     http_proxy_port: int = 8000,
 ):
     try:
         self._controller = ray.get_actor(controller_name, namespace="serve")
     except ValueError:
         self._controller = None
     if self._controller is None:
         # Used for scheduling things to the head node explicitly.
         head_node_id = ray.get_runtime_context().node_id.hex()
         http_config = HTTPOptions()
         http_config.port = http_proxy_port
         self._controller = ServeController.options(
             num_cpus=1 if dedicated_cpu else 0,
             name=controller_name,
             lifetime="detached" if detached else None,
             max_restarts=-1,
             max_task_retries=-1,
             # Schedule the controller on the head node with a soft constraint. This
             # prefers it to run on the head node in most cases, but allows it to be
             # restarted on other nodes in an HA cluster.
             scheduling_strategy=NodeAffinitySchedulingStrategy(
                 head_node_id, soft=True
             ),
             namespace="serve",
             max_concurrency=CONTROLLER_MAX_CONCURRENCY,
         ).remote(
             controller_name,
             http_config=http_config,
             checkpoint_path=checkpoint_path,
             head_node_id=head_node_id,
             detached=detached,
         )

Exemple #9

0

Afficher le fichier

Fichier : client.py Projet : tchordia/ray

def get_controller_namespace(
        detached: bool, _override_controller_namespace: Optional[str] = None):
    """Gets the controller's namespace.

    Args:
        detached (bool): Whether serve.start() was called with detached=True
        _override_controller_namespace (Optional[str]): When set, this is the
            controller's namespace
    """

    if _override_controller_namespace is not None:
        return _override_controller_namespace

    controller_namespace = ray.get_runtime_context().namespace

    if not detached:
        return controller_namespace

    # Start controller in "serve" namespace if detached and currently
    # in anonymous namespace.
    if ANONYMOUS_NAMESPACE_PATTERN.fullmatch(controller_namespace) is not None:
        controller_namespace = "serve"
    return controller_namespace

Exemple #10

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def wait_signal(self):
     ray.get(signal.wait.remote())
     return ray.get_runtime_context()._get_actor_call_stats()

Exemple #11

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def __init__(self):
     self._was_reconstructed = ray.get_runtime_context(
     ).was_current_actor_reconstructed

Exemple #12

0

Afficher le fichier

 def ping(self):
     return ray.get_runtime_context().node_id.hex()

Exemple #13

0

Afficher le fichier

        def check_and_get_node_id(self):
            import test_module

            test_module.one()
            return ray.get_runtime_context().node_id

Exemple #14

0

Afficher le fichier

Fichier : per_task_runtime_env.py Projet : vishalbelsare/ray

    def get_task_working_dir():
        # Check behavior of working_dir: The cwd should contain the
        # current file, which is being used as a job entrypoint script.
        assert os.path.exists("per_task_runtime_env.py")

        return ray.get_runtime_context().runtime_env.working_dir()

Exemple #15

0

Afficher le fichier

Fichier : test_http_routes.py Projet : wuisawesome/ray

 def h():
     ray.get(
         intentional_kill.remote(ray.get_runtime_context().current_actor))
     time.sleep(100)  # Don't return here to leave time for actor exit.

Exemple #16

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def task(node_id, job_id):
     context_dict = ray.get_runtime_context().get()
     assert context_dict["node_id"] == node_id
     assert context_dict["job_id"] == job_id
     assert context_dict["task_id"] is not None
     assert "actor_id" not in context_dict

Exemple #17

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def current_job_id(self):
     return ray.get_runtime_context().job_id

Exemple #18

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 async def func(self):
     await signal.wait.remote()
     return ray.get_runtime_context()._get_actor_call_stats()

Exemple #19

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def update_was_reconstructed(self):
     return ray.get_runtime_context().was_current_actor_reconstructed

Exemple #20

0

Afficher le fichier

 def get_env():
     return ray.get_runtime_context().runtime_env

Exemple #21

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def current_actor_id(self):
     return ray.get_runtime_context().actor_id

Exemple #22

0

Afficher le fichier

 def get(self):
     return ray.get_runtime_context().runtime_env

Exemple #23

0

Afficher le fichier

 def get_id(self):
     return ray.get_runtime_context().actor_id.hex()

Exemple #24

0

Afficher le fichier

 def get_node_id():
     return ray.get_runtime_context().node_id

Exemple #25

0

Afficher le fichier

 def get_address(self):
     if self.ray_version >= StrictVersion('1.0.0'):
         return ray.get_runtime_context().worker.node_ip_address
     else:
         return ray.services.get_node_ip_address()

Exemple #26

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def echo(self, s):
     self_actor = ray.get_runtime_context().current_actor
     return self_actor.echo2.remote(s)

Exemple #27

0

Afficher le fichier

Fichier : test_namespace.py Projet : vishalbelsare/ray

def test_runtime_context(shutdown_only):
    ray.init(namespace="abc")
    namespace = ray.get_runtime_context().namespace
    assert namespace == "abc"
    assert namespace == ray.get_runtime_context().get()["namespace"]

Exemple #28

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def func():
     return ray.get_runtime_context()._get_actor_call_stats()

Exemple #29

0

Afficher le fichier

    def deploy(
            self,
            name: str,
            deployment_def: Union[Callable, Type[Callable], str],
            init_args: Tuple[Any],
            init_kwargs: Dict[Any, Any],
            ray_actor_options: Optional[Dict] = None,
            config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
            version: Optional[str] = None,
            prev_version: Optional[str] = None,
            route_prefix: Optional[str] = None,
            url: Optional[str] = None,
            _blocking: Optional[bool] = True) -> Optional[GoalId]:
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}

        curr_job_env = ray.get_runtime_context().runtime_env
        if "runtime_env" in ray_actor_options:
            ray_actor_options["runtime_env"].setdefault(
                "working_dir", curr_job_env.get("working_dir"))
        else:
            ray_actor_options["runtime_env"] = curr_job_env

        replica_config = ReplicaConfig(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options)

        if isinstance(config, dict):
            deployment_config = DeploymentConfig.parse_obj(config)
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError(
                "config must be a DeploymentConfig or a dictionary.")

        if deployment_config.autoscaling_config is not None and \
            deployment_config.max_concurrent_queries < deployment_config. \
                autoscaling_config.target_num_ongoing_requests_per_replica:
            logger.warning("Autoscaling will never happen, "
                           "because 'max_concurrent_queries' is less than "
                           "'target_num_ongoing_requests_per_replica' now.")

        goal_id, updating = ray.get(
            self._controller.deploy.remote(name,
                                           deployment_config.to_proto_bytes(),
                                           replica_config, version,
                                           prev_version, route_prefix,
                                           ray.get_runtime_context().job_id))

        tag = f"component=serve deployment={name}"

        if updating:
            msg = f"Updating deployment '{name}'"
            if version is not None:
                msg += f" to version '{version}'"
            logger.info(f"{msg}. {tag}")
        else:
            logger.info(f"Deployment '{name}' is already at version "
                        f"'{version}', not updating. {tag}")

        if _blocking:
            self._wait_for_goal(goal_id)

            if url is not None:
                url_part = f" at `{url}`"
            else:
                url_part = ""
            logger.info(
                f"Deployment '{name}{':'+version if version else ''}' is ready"
                f"{url_part}. {tag}")
        else:
            return goal_id

Exemple #30

0

Afficher le fichier

Fichier : test_runtime_context.py Projet : rlan/ray

 def run(self):
     return ray.get_runtime_context()._get_actor_call_stats()