Exemple #1
0
 async def set_fetch_memory_info(self, req) -> aiohttp.web.Response:
     should_fetch = req.query["shouldFetch"]
     if should_fetch == "true":
         self._collect_memory_info = True
     elif should_fetch == "false":
         self._collect_memory_info = False
     else:
         return dashboard_utils.rest_response(
             success=False,
             message=f"Unknown argument to set_fetch {should_fetch}")
     return dashboard_utils.rest_response(
         success=True,
         message=f"Successfully set fetching to {should_fetch}")
Exemple #2
0
 async def get_job(self, req) -> aiohttp.web.Response:
     job_id = req.match_info.get("job_id")
     view = req.query.get("view")
     if view is None:
         job_detail = {
             "jobInfo": DataSource.jobs.get(job_id, {}),
             "jobActors": DataSource.job_actors.get(job_id, {}),
             "jobWorkers": DataSource.job_workers.get(job_id, []),
         }
         await GlobalSignals.job_info_fetched.send(job_detail)
         return dashboard_utils.rest_response(
             success=True, message="Job detail fetched.", detail=job_detail)
     else:
         return dashboard_utils.rest_response(
             success=False, message="Unknown view {}".format(view))
Exemple #3
0
    async def get_event(self, req) -> aiohttp.web.Response:
        job_id = req.query.get("job_id")
        if job_id is None:
            all_events = {
                job_id: list(job_events.values())
                for job_id, job_events in DataSource.events.items()
            }
            return dashboard_utils.rest_response(
                success=True, message="All events fetched.", events=all_events)

        job_events = DataSource.events.get(job_id, {})
        return dashboard_utils.rest_response(
            success=True,
            message="Job events fetched.",
            job_id=job_id,
            events=list(job_events.values()))
Exemple #4
0
 async def get_errors(self, req) -> aiohttp.web.Response:
     ip = req.query["ip"]
     pid = req.query.get("pid")
     node_errors = DataSource.ip_and_pid_to_errors[ip]
     filtered_errs = node_errors.get(pid, []) if pid else node_errors
     return dashboard_utils.rest_response(
         success=True, message="Fetched errors.", errors=filtered_errs)
Exemple #5
0
 async def get_logs(self, req) -> aiohttp.web.Response:
     ip = req.query["ip"]
     pid = req.query.get("pid")
     node_logs = DataSource.ip_and_pid_to_logs[ip]
     payload = node_logs.get(pid, []) if pid else node_logs
     return dashboard_utils.rest_response(
         success=True, message="Fetched logs.", logs=payload)
Exemple #6
0
    async def get_cluster_status(self, req):
        """Returns status information about the cluster.

        Currently contains two fields:
            autoscaling_status (str): a status message from the autoscaler.
            autoscaling_error (str): an error message from the autoscaler if
                anything has gone wrong during autoscaling.

        These fields are both read from the GCS, it's expected that the
        autoscaler writes them there.
        """

        aioredis_client = self._dashboard_head.aioredis_client
        legacy_status = await aioredis_client.hget(
            DEBUG_AUTOSCALING_STATUS_LEGACY, "value")
        formatted_status_string = await aioredis_client.hget(
            DEBUG_AUTOSCALING_STATUS, "value")
        formatted_status = json.loads(formatted_status_string.decode()
                                      ) if formatted_status_string else {}
        error = await aioredis_client.hget(DEBUG_AUTOSCALING_ERROR, "value")
        return dashboard_utils.rest_response(
            success=True,
            message="Got cluster status.",
            autoscaling_status=legacy_status.decode()
            if legacy_status else None,
            autoscaling_error=error.decode() if error else None,
            cluster_status=formatted_status if formatted_status else None,
        )
 async def get_errors(self, req) -> aiohttp.web.Response:
     ip = req.query["ip"]
     pid = str(req.query.get("pid", ""))
     node_errors = DataSource.ip_and_pid_to_errors.get(ip, {})
     if pid:
         node_errors = {str(pid): node_errors.get(pid, [])}
     return dashboard_utils.rest_response(
         success=True, message="Fetched errors.", errors=node_errors)
Exemple #8
0
 async def get_availability(self, req) -> aiohttp.web.Response:
     availability = {
         "available": Analysis is not None,
         "trials_available": self._trials_available
     }
     return rest_response(success=True,
                          message="Fetched tune availability",
                          result=availability)
Exemple #9
0
 async def snapshot(self, req):
     job_data = await self.get_job_info()
     actor_data = await self.get_actor_info()
     snapshot = {
         "jobs": job_data,
         "actors": actor_data,
     }
     return dashboard_utils.rest_response(
         success=True, message="hello", snapshot=snapshot)
Exemple #10
0
 async def dump(self, req) -> aiohttp.web.Response:
     key = req.query.get("key")
     if key is None:
         all_data = {
             k: dict(v)
             for k, v in DataSource.__dict__.items()
             if not k.startswith("_")
         }
         return dashboard_utils.rest_response(
             success=True,
             message="Fetch all data from datacenter success.",
             **all_data)
     else:
         data = dict(DataSource.__dict__.get(key))
         return dashboard_utils.rest_response(
             success=True,
             message=f"Fetch {key} from datacenter success.",
             **{key: data})
Exemple #11
0
 async def get_actor_groups(self, req) -> aiohttp.web.Response:
     actors = await DataOrganizer.get_all_actors()
     actor_creation_tasks = await DataOrganizer.get_actor_creation_tasks()
     # actor_creation_tasks have some common interface with actors,
     # and they get processed and shown in tandem in the logical view
     # hence we merge them together before constructing actor groups.
     actors.update(actor_creation_tasks)
     actor_groups = actor_utils.construct_actor_groups(actors)
     return rest_response(success=True,
                          message="Fetched actor groups.",
                          actor_groups=actor_groups)
Exemple #12
0
    async def get_ray_config(self, req) -> aiohttp.web.Response:
        if self._ray_config is None:
            try:
                config_path = os.path.expanduser("~/ray_bootstrap_config.yaml")
                with open(config_path) as f:
                    cfg = yaml.safe_load(f)
            except yaml.YAMLError:
                return dashboard_utils.rest_response(
                    success=False,
                    message=f"No config found at {config_path}.",
                )
            except FileNotFoundError:
                return dashboard_utils.rest_response(
                    success=False,
                    message="Invalid config, could not load YAML.")

            payload = {
                "min_workers": cfg["min_workers"],
                "max_workers": cfg["max_workers"],
                "initial_workers": cfg["initial_workers"],
                "autoscaling_mode": cfg["autoscaling_mode"],
                "idle_timeout_minutes": cfg["idle_timeout_minutes"],
            }

            try:
                payload["head_type"] = cfg["head_node"]["InstanceType"]
            except KeyError:
                payload["head_type"] = "unknown"

            try:
                payload["worker_type"] = cfg["worker_nodes"]["InstanceType"]
            except KeyError:
                payload["worker_type"] = "unknown"

            self._ray_config = payload

        return dashboard_utils.rest_response(
            success=True,
            message="Fetched ray config.",
            **self._ray_config,
        )
Exemple #13
0
 async def launch_profiling(self, req) -> aiohttp.web.Response:
     ip = req.query["ip"]
     pid = int(req.query["pid"])
     duration = int(req.query["duration"])
     reporter_stub = self._stubs[ip]
     reply = await reporter_stub.GetProfilingStats(
         reporter_pb2.GetProfilingStatsRequest(pid=pid, duration=duration))
     profiling_info = (json.loads(reply.profiling_stats)
                       if reply.profiling_stats else reply.std_out)
     return dashboard_utils.rest_response(success=True,
                                          message="Profiling success.",
                                          profiling_info=profiling_info)
    async def kill_actor(self, req) -> aiohttp.web.Response:
        try:
            actor_id = req.query["actorId"]
            ip_address = req.query["ipAddress"]
            port = req.query["port"]
        except KeyError:
            return rest_response(success=False, message="Bad Request")
        try:
            channel = aiogrpc.insecure_channel(f"{ip_address}:{port}")
            stub = core_worker_pb2_grpc.CoreWorkerServiceStub(channel)

            await stub.KillActor(
                core_worker_pb2.KillActorRequest(
                    intended_actor_id=ray.utils.hex_to_binary(actor_id)))

        except aiogrpc.AioRpcError:
            # This always throws an exception because the worker
            # is killed and the channel is closed on the worker side
            # before this handler, however it deletes the actor correctly.
            pass

        return rest_response(
            success=True, message=f"Killed actor with id {actor_id}")
Exemple #15
0
    async def get_memory_table(self, req) -> aiohttp.web.Response:
        group_by = req.query.get("group_by")
        sort_by = req.query.get("sort_by")
        kwargs = {}
        if group_by:
            kwargs["group_by"] = GroupByType(group_by)
        if sort_by:
            kwargs["sort_by"] = SortingType(sort_by)

        memory_table = await DataOrganizer.get_memory_table(**kwargs)
        return dashboard_utils.rest_response(
            success=True,
            message="Fetched memory table",
            memory_table=memory_table.as_dict())
Exemple #16
0
 async def snapshot(self, req):
     job_data = await self.get_job_info()
     actor_data = await self.get_actor_info()
     serve_data = await self.get_serve_info()
     session_name = await self.get_session_name()
     snapshot = {
         "jobs": job_data,
         "actors": actor_data,
         "deployments": serve_data,
         "session_name": session_name,
         "ray_version": ray.__version__,
         "ray_commit": ray.__commit__
     }
     return dashboard_utils.rest_response(success=True,
                                          message="hello",
                                          snapshot=snapshot)
Exemple #17
0
 async def test_aiohttp_cache_lru(self, req) -> aiohttp.web.Response:
     value = req.query.get("value")
     return dashboard_utils.rest_response(success=True,
                                          message="OK",
                                          value=value,
                                          timestamp=time.time())
Exemple #18
0
 async def get_notified_agents(self, req) -> aiohttp.web.Response:
     return dashboard_utils.rest_response(
         success=True,
         message="Fetch notified agents success.",
         **self._notified_agents)
Exemple #19
0
 async def enable_tensorboard(self, req) -> aiohttp.web.Response:
     self._enable_tensorboard()
     if not self._tensor_board_dir:
         return rest_response(success=False,
                              message="Error enabling tensorboard")
     return rest_response(success=True, message="Enabled tensorboard")
Exemple #20
0
 async def tune_info(self, req) -> aiohttp.web.Response:
     stats = self.get_stats()
     return rest_response(success=True,
                          message="Fetched tune info",
                          result=stats)
Exemple #21
0
def test_immutable_types():
    d = {str(i): i for i in range(1000)}
    d["list"] = list(range(1000))
    d["list"][0] = {str(i): i for i in range(1000)}
    d["dict"] = {str(i): i for i in range(1000)}
    immutable_dict = dashboard_utils.make_immutable(d)
    assert type(immutable_dict) == dashboard_utils.ImmutableDict
    assert immutable_dict == dashboard_utils.ImmutableDict(d)
    assert immutable_dict == d
    assert dashboard_utils.ImmutableDict(immutable_dict) == immutable_dict
    assert dashboard_utils.ImmutableList(
        immutable_dict["list"]) == immutable_dict["list"]
    assert "512" in d
    assert "512" in d["list"][0]
    assert "512" in d["dict"]

    # Test type conversion
    assert type(dict(immutable_dict)["list"]) == dashboard_utils.ImmutableList
    assert type(list(
        immutable_dict["list"])[0]) == dashboard_utils.ImmutableDict

    # Test json dumps / loads
    json_str = json.dumps(immutable_dict, cls=dashboard_utils.CustomEncoder)
    deserialized_immutable_dict = json.loads(json_str)
    assert type(deserialized_immutable_dict) == dict
    assert type(deserialized_immutable_dict["list"]) == list
    assert immutable_dict.mutable() == deserialized_immutable_dict
    dashboard_utils.rest_response(True, "OK", data=immutable_dict)
    dashboard_utils.rest_response(True, "OK", **immutable_dict)

    # Test copy
    copy_of_immutable = copy.copy(immutable_dict)
    assert copy_of_immutable == immutable_dict
    deepcopy_of_immutable = copy.deepcopy(immutable_dict)
    assert deepcopy_of_immutable == immutable_dict

    # Test get default immutable
    immutable_default_value = immutable_dict.get("not exist list", [1, 2])
    assert type(immutable_default_value) == dashboard_utils.ImmutableList

    # Test recursive immutable
    assert type(immutable_dict["list"]) == dashboard_utils.ImmutableList
    assert type(immutable_dict["dict"]) == dashboard_utils.ImmutableDict
    assert type(immutable_dict["list"][0]) == dashboard_utils.ImmutableDict

    # Test exception
    with pytest.raises(TypeError):
        dashboard_utils.ImmutableList((1, 2))

    with pytest.raises(TypeError):
        dashboard_utils.ImmutableDict([1, 2])

    with pytest.raises(TypeError):
        immutable_dict["list"] = []

    with pytest.raises(AttributeError):
        immutable_dict.update({1: 3})

    with pytest.raises(TypeError):
        immutable_dict["list"][0] = 0

    with pytest.raises(AttributeError):
        immutable_dict["list"].extend([1, 2])

    with pytest.raises(AttributeError):
        immutable_dict["list"].insert(1, 2)

    d2 = dashboard_utils.ImmutableDict({1: np.zeros([3, 5])})
    with pytest.raises(TypeError):
        print(d2[1])

    d3 = dashboard_utils.ImmutableList([1, np.zeros([3, 5])])
    with pytest.raises(TypeError):
        print(d3[1])
Exemple #22
0
 async def get_all_actors(self, req) -> aiohttp.web.Response:
     return dashboard_utils.rest_response(success=True,
                                          message="All actors fetched.",
                                          actors=DataSource.actors)
Exemple #23
0
 async def get_node(self, req) -> aiohttp.web.Response:
     node_id = req.match_info.get("node_id")
     node_info = await DataOrganizer.get_node_info(node_id)
     return dashboard_utils.rest_response(success=True,
                                          message="Node details fetched.",
                                          detail=node_info)