예제 #1
0
파일: state_cli.py 프로젝트: tchordia/ray
def nodes(ctx, format: str):
    url = ctx.obj["api_server_url"]
    format = AvailableFormat(format)
    print(
        get_state_api_output_to_print(
            list_nodes(api_server_url=url, _explain=_should_explain(format)),
            format=format,
        )
    )
예제 #2
0
def test_list_nodes(shutdown_only):
    ray.init()

    def verify():
        node_data = list(list_nodes().values())[0]
        correct_state = node_data["state"] == "ALIVE"
        is_id_hex = is_hex(node_data["node_id"])
        correct_id = ray.nodes()[0]["NodeID"] == node_data["node_id"]
        return correct_state and is_id_hex and correct_id

    wait_for_condition(verify)
    print(list_nodes())
예제 #3
0
def nodes(ctx, format: str, filter: List[Tuple[str, str]]):
    url = ctx.obj["api_server_url"]
    format = AvailableFormat(format)
    print(
        get_state_api_output_to_print(
            list_nodes(
                api_server_url=url,
                filters=filter,
                _explain=_should_explain(format),
            ),
            format=format,
        )
    )
예제 #4
0
    def verify():
        head_node = list_nodes()[0]
        # When glob filter is not provided, it should provide all logs
        logs = list_logs(node_id=head_node["node_id"])
        assert "raylet" in logs
        assert "gcs_server" in logs
        assert "dashboard" in logs
        assert "agent" in logs
        assert "internal" in logs
        assert "driver" in logs
        assert "autoscaler" in logs

        # Test glob works.
        logs = list_logs(node_id=head_node["node_id"], glob_filter="raylet*")
        assert len(logs) == 1
        return True
예제 #5
0
def test_log_get(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=0)
    ray.init(address=cluster.address)
    head_node = list_nodes()[0]
    cluster.add_node(num_cpus=1)

    @ray.remote(num_cpus=1)
    class Actor:
        def print(self, i):
            for _ in range(i):
                print("1")

        def getpid(self):
            import os

            return os.getpid()

    """
    Test filename match
    """

    def verify():
        # By default, node id should be configured to the head node.
        for log in get_log(
            node_id=head_node["node_id"], filename="raylet.out", tail=10
        ):
            # + 1 since the last line is just empty.
            assert len(log.split("\n")) == 11
        return True

    wait_for_condition(verify)

    """
    Test worker pid / IP match
    """
    a = Actor.remote()
    pid = ray.get(a.getpid.remote())
    ray.get(a.print.remote(20))

    def verify():
        # By default, node id should be configured to the head node.
        for log in get_log(node_ip=head_node["node_ip"], pid=pid, tail=10):
            # + 1 since the last line is just empty.
            assert len(log.split("\n")) == 11
        return True

    wait_for_condition(verify)

    """
    Test actor logs.
    """
    actor_id = a._actor_id.hex()

    def verify():
        # By default, node id should be configured to the head node.
        for log in get_log(actor_id=actor_id, tail=10):
            # + 1 since the last line is just empty.
            assert len(log.split("\n")) == 11
        return True

    wait_for_condition(verify)

    with pytest.raises(NotImplementedError):
        for _ in get_log(task_id=123, tail=10):
            pass
예제 #6
0
def test_logs_stream_and_tail(ray_start_with_dashboard):
    assert wait_until_server_available(ray_start_with_dashboard["webui_url"]) is True
    webui_url = ray_start_with_dashboard["webui_url"]
    webui_url = format_web_url(webui_url)
    node_id = list_nodes()[0]["node_id"]

    def verify_basic():
        stream_response = requests.get(
            webui_url
            + f"/api/v0/logs/file?node_id={node_id}&filename=gcs_server.out&lines=5",
            stream=True,
        )
        if stream_response.status_code != 200:
            raise ValueError(stream_response.content.decode("utf-8"))
        lines = []
        for line in stream_response.iter_lines():
            lines.append(line.decode("utf-8"))
        return len(lines) == 5 or len(lines) == 6

    wait_for_condition(verify_basic)

    @ray.remote
    class Actor:
        def write_log(self, strings):
            for s in strings:
                print(s)

        def getpid(self):
            return os.getpid()

    test_log_text = "test_log_text_日志_{}"
    actor = Actor.remote()
    ray.get(actor.write_log.remote([test_log_text.format("XXXXXX")]))

    # Test stream and fetching by actor id
    stream_response = requests.get(
        webui_url
        + "/api/v0/logs/stream?&lines=2"
        + f"&actor_id={actor._ray_actor_id.hex()}",
        stream=True,
    )
    if stream_response.status_code != 200:
        raise ValueError(stream_response.content.decode("utf-8"))
    stream_iterator = stream_response.iter_content(chunk_size=None)
    # NOTE: Prefix 1 indicates the stream has succeeded.
    assert (
        next(stream_iterator).decode("utf-8")
        == "1:actor_name:Actor\n" + test_log_text.format("XXXXXX") + "\n"
    )

    streamed_string = ""
    for i in range(5):
        strings = []
        for j in range(100):
            strings.append(test_log_text.format(f"{100*i + j:06d}"))

        ray.get(actor.write_log.remote(strings))

        string = ""
        for s in strings:
            string += s + "\n"
        streamed_string += string
        # NOTE: Prefix 1 indicates the stream has succeeded.
        assert next(stream_iterator).decode("utf-8") == "1" + string
    del stream_response

    # Test tailing log by actor id
    LINES = 150
    file_response = requests.get(
        webui_url
        + f"/api/v0/logs/file?&lines={LINES}"
        + "&actor_id="
        + actor._ray_actor_id.hex(),
    ).content.decode("utf-8")
    # NOTE: Prefix 1 indicates the stream has succeeded.
    assert file_response == "1" + "\n".join(streamed_string.split("\n")[-(LINES + 1) :])

    # Test query by pid & node_ip instead of actor id.
    node_ip = list(ray.nodes())[0]["NodeManagerAddress"]
    pid = ray.get(actor.getpid.remote())
    file_response = requests.get(
        webui_url
        + f"/api/v0/logs/file?node_ip={node_ip}&lines={LINES}"
        + f"&pid={pid}",
    ).content.decode("utf-8")
    # NOTE: Prefix 1 indicates the stream has succeeded.
    assert file_response == "1" + "\n".join(streamed_string.split("\n")[-(LINES + 1) :])
예제 #7
0
def test_logs_list(ray_start_with_dashboard):
    assert wait_until_server_available(ray_start_with_dashboard["webui_url"]) is True
    webui_url = ray_start_with_dashboard["webui_url"]
    webui_url = format_web_url(webui_url)
    node_id = list_nodes()[0]["node_id"]

    def verify():
        response = requests.get(webui_url + f"/api/v0/logs?node_id={node_id}")
        response.raise_for_status()
        result = json.loads(response.text)
        assert result["result"]
        logs = result["data"]["result"]

        # Test worker logs
        outs = logs["worker_out"]
        errs = logs["worker_err"]
        core_worker_logs = logs["core_worker"]

        assert len(outs) == len(errs) == len(core_worker_logs)
        assert len(outs) > 0

        # Test gcs / raylet / dashboard
        for file in ["gcs_server.out", "gcs_server.err"]:
            assert file in logs["gcs_server"]
        for file in ["raylet.out", "raylet.err"]:
            assert file in logs["raylet"]
        for file in ["dashboard.log"]:
            assert file in logs["dashboard"]
        for file in ["dashboard_agent.log"]:
            assert file in logs["agent"]
        return True

    wait_for_condition(verify)

    def verify_filter():
        # Test that logs/list can be filtered
        response = requests.get(
            webui_url + f"/api/v0/logs?node_id={node_id}&glob=*gcs*"
        )
        response.raise_for_status()
        result = json.loads(response.text)
        assert result["result"]
        logs = result["data"]["result"]
        assert "gcs_server" in logs
        assert "internal" in logs
        assert len(logs) == 2
        assert "gcs_server.out" in logs["gcs_server"]
        assert "gcs_server.err" in logs["gcs_server"]
        assert "debug_state_gcs.txt" in logs["internal"]
        return True

    wait_for_condition(verify_filter)

    def verify_worker_logs():
        response = requests.get(
            webui_url + f"/api/v0/logs?node_id={node_id}&glob=*worker*"
        )
        response.raise_for_status()
        result = json.loads(response.text)
        assert result["result"]
        logs = result["data"]["result"]
        worker_log_categories = [
            "core_worker",
            "worker_out",
            "worker_err",
        ]
        assert all([cat in logs for cat in worker_log_categories])
        num_workers = len(
            list(filter(lambda w: w["worker_type"] == "WORKER", list_workers()))
        )
        assert (
            len(logs["worker_out"])
            == len(logs["worker_err"])
            == len(logs["worker_out"])
        )
        assert num_workers == len(logs["worker_out"])
        return True

    wait_for_condition(verify_worker_logs)
예제 #8
0
def nodes(ctx):
    url = ctx.obj["api_server_url"]
    pprint(list_nodes(url))
예제 #9
0
 def verify():
     node_data = list(list_nodes().values())[0]
     correct_state = node_data["state"] == "ALIVE"
     is_id_hex = is_hex(node_data["node_id"])
     correct_id = ray.nodes()[0]["NodeID"] == node_data["node_id"]
     return correct_state and is_id_hex and correct_id
예제 #10
0
파일: state_cli.py 프로젝트: smorad/ray
def nodes(ctx, format: str):
    url = ctx.obj["api_server_url"]
    print(
        get_state_api_output_to_print(list_nodes(api_server_url=url),
                                      format=AvailableFormat(format)))