def get_log( api_server_url: str = None, node_id: Optional[str] = None, node_ip: Optional[str] = None, filename: Optional[str] = None, actor_id: Optional[str] = None, task_id: Optional[str] = None, pid: Optional[int] = None, follow: bool = False, tail: int = DEFAULT_LOG_LIMIT, timeout: int = DEFAULT_RPC_TIMEOUT, _interval: Optional[float] = None, ) -> Generator[str, None, None]: if api_server_url is None: assert ray.is_initialized() api_server_url = ( f"http://{ray._private.worker.global_worker.node.address_info['webui_url']}" ) media_type = "stream" if follow else "file" options = GetLogOptions( node_id=node_id, node_ip=node_ip, filename=filename, actor_id=actor_id, task_id=task_id, pid=pid, lines=tail, interval=_interval, media_type=media_type, timeout=timeout, ) options_dict = {} for field in fields(options): option_val = getattr(options, field.name) if option_val: options_dict[field.name] = option_val with requests.get( f"{api_server_url}/api/v0/logs/{media_type}?" f"{urllib.parse.urlencode(options_dict)}", stream=True, ) as r: if r.status_code != 200: raise RayStateApiException(r.text) for bytes in r.iter_content(chunk_size=None): bytes = bytearray(bytes) # First byte 1 means success. if bytes.startswith(b"1"): bytes.pop(0) logs = bytes.decode("utf-8") else: assert bytes.startswith(b"0") error_msg = bytes.decode("utf-8") raise RayStateApiException(error_msg) yield logs
def _make_http_get_request( self, endpoint: str, params: Dict, timeout: float, _explain: bool = False, ): with warnings_on_slow_request(address=self._address, endpoint=endpoint, timeout=timeout, explain=_explain): # Send a request. response = None try: response = self._do_request( "GET", endpoint, timeout=timeout, params=params, ) response.raise_for_status() except Exception as e: err_str = f"Failed to make request to {self._address}{endpoint}. " # Best-effort to give hints to users on potential reasons of connection # failure. if isinstance(e, requests.exceptions.ConnectionError): err_str += ( "Failed to connect to API server. Please check the API server " "log for details. Make sure dependencies are installed with " "`pip install ray[default]`.") raise ServerUnavailable(err_str) if response is not None: err_str += ( f"Response(url={response.url},status={response.status_code})" ) raise RayStateApiException(err_str) from e # Process the response. response = response.json() if response["result"] is False: raise RayStateApiException( "API server internal error. See dashboard.log file for more details. " f"Error: {response['msg']}") # Dictionary of `ListApiResponse` return response["data"]["result"]
def list_logs( api_server_url: str = None, node_id: str = None, node_ip: str = None, glob_filter: str = None, timeout: int = DEFAULT_RPC_TIMEOUT, ) -> Dict[str, List[str]]: if api_server_url is None: assert ray.is_initialized() api_server_url = ( f"http://{ray._private.worker.global_worker.node.address_info['webui_url']}" ) if not glob_filter: glob_filter = "*" options_dict = {} if node_ip: options_dict["node_ip"] = node_ip if node_id: options_dict["node_id"] = node_id if glob_filter: options_dict["glob"] = glob_filter options_dict["timeout"] = timeout r = requests.get( f"{api_server_url}/api/v0/logs?{urllib.parse.urlencode(options_dict)}") r.raise_for_status() response = r.json() if response["result"] is False: raise RayStateApiException( "API server internal error. See dashboard.log file for more details. " f"Error: {response['msg']}") return response["data"]["result"]
def _request(self, endpoint: str, timeout: float, params: Dict): try: response = self._do_request( "GET", endpoint, timeout=timeout, params=params, ) response.raise_for_status() except Exception as e: err_str = f"Failed to make request to {endpoint}. " # Best-effort to give hints to users on potential reasons of connection # failure. if isinstance(e, requests.exceptions.ConnectionError): err_str += ( "Failed to connect to API server. Please check the API server " "log for details. Make sure dependencies are installed with " "`pip install ray[default]`.") raise ServerUnavailable(err_str) if response is not None: err_str += f"Response(url={response.url},status={response.status_code})" raise RayStateApiException(err_str) from e response = response.json() return response
def _list( resource_name: str, options: ListApiOptions, api_server_url: str = None, _explain: bool = False, ): """Query the API server in address to list "resource_name" states. Args: resource_name: The name of the resource. E.g., actor, task. options: The options for the REST API that are translated to query strings. api_server_url: The address of API server. If it is not give, it assumes the ray is already connected and obtains the API server address using Ray API. explain: Print the API information such as API latency or failed query information. """ if api_server_url is None: assert ray.is_initialized() api_server_url = ( f"http://{ray.worker.global_worker.node.address_info['webui_url']}" ) # We don't use `asdict` to avoid deepcopy. # https://docs.python.org/3/library/dataclasses.html#dataclasses.asdict params = { "limit": options.limit, "timeout": options.timeout, "filter_keys": [], "filter_values": [], } for filter in options.filters: filter_k, filter_val = filter params["filter_keys"].append(filter_k) params["filter_values"].append(filter_val) r = requests.request( "GET", f"{api_server_url}/api/v0/{resource_name}", params=params, headers={"Content-Type": "application/json"}, json=None, timeout=options.timeout, ) r.raise_for_status() response = r.json() if response["result"] is False: raise RayStateApiException( "API server internal error. See dashboard.log file for more details. " f"Error: {response['msg']}") if _explain: # Print warnings if anything was given. warning_msg = response["data"].get("partial_failure_warning", None) if warning_msg: warnings.warn(warning_msg, RuntimeWarning) return r.json()["data"]["result"]
def list(self, resource: StateResource, options: ListApiOptions, _explain: bool = False) -> Union[Dict, List]: """List resources states Args: resource_name: Resource names, i.e. 'jobs', 'actors', 'nodes', see `StateResource` for details. options: List options. See `ListApiOptions` for details. _explain: Print the API information such as API latency or failed query information. Returns: A list of queried result from `ListApiResponse`, Raises: This doesn't catch any exceptions raised when the underlying request call raises exceptions. For example, it could raise `requests.Timeout` when timeout occurs. """ endpoint = f"/api/v0/{resource.value}" # We don't use `asdict` to avoid deepcopy. # https://docs.python.org/3/library/dataclasses.html#dataclasses.asdict params = { "limit": options.limit, "timeout": options.timeout, "filter_keys": [], "filter_values": [], } for filter in options.filters: filter_k, filter_val = filter params["filter_keys"].append(filter_k) params["filter_values"].append(filter_val) response = self._request(endpoint, options.timeout, params) if response["result"] is False: raise RayStateApiException( "API server internal error. See dashboard.log file for more details. " f"Error: {response['msg']}") # Print warnings if anything was given. warning_msgs = response["data"].get("partial_failure_warning", None) if warning_msgs and _explain: warnings.warn(warning_msgs, RuntimeWarning) return response["data"]["result"]
def _list( resource_name: str, options: ListApiOptions, api_server_url: str = None, _explain: bool = False, ): """Query the API server in address to list "resource_name" states. Args: resource_name: The name of the resource. E.g., actor, task. options: The options for the REST API that are translated to query strings. api_server_url: The address of API server. If it is not give, it assumes the ray is already connected and obtains the API server address using Ray API. explain: Print the API information such as API latency or failed query information. """ if api_server_url is None: assert ray.is_initialized() api_server_url = ( f"http://{ray.worker.global_worker.node.address_info['webui_url']}" ) query_strings = [] for field in fields(options): query_strings.append(f"{field.name}={getattr(options, field.name)}") r = requests.request( "GET", f"{api_server_url}/api/v0/{resource_name}?{'&'.join(query_strings)}", headers={"Content-Type": "application/json"}, json=None, timeout=options.timeout, ) r.raise_for_status() response = r.json() if response["result"] is False: raise RayStateApiException( "API server internal error. See dashboard.log file for more details. " f"Error: {response['msg']}" ) if _explain: # Print warnings if anything was given. warning_msg = response["data"].get("partial_failure_warning", None) if warning_msg is not None: warnings.warn(warning_msg, RuntimeWarning) return r.json()["data"]["result"]
def summary( self, resource: SummaryResource, *, options: SummaryApiOptions, _explain: bool = False, ) -> Dict: """Summarize resources states Args: resource_name: Resource names, see `SummaryResource` for details. options: summary options. See `SummaryApiOptions` for details. _explain: Print the API information such as API latency or failed query information. Returns: A dictionary of queried result from `SummaryApiResponse`, Raises: This doesn't catch any exceptions raised when the underlying request call raises exceptions. For example, it could raise `requests.Timeout` when timeout occurs. """ params = {"timeout": options.timeout} endpoint = f"/api/v0/{resource.value}/summarize" response = self._request(endpoint, options.timeout, params) if response["result"] is False: raise RayStateApiException( "API server internal error. See dashboard.log file for more details. " f"Error: {response['msg']}") if _explain: # Print warnings if anything was given. warning_msg = response["data"].get("partial_failure_warning", None) if warning_msg: warnings.warn(warning_msg, RuntimeWarning) return response["data"]["result"]["node_id_to_summary"]