예제 #1
0
def memory_summary(node_manager_address=None,
                   node_manager_port=None,
                   stats_only=False):
    """Returns a formatted string describing memory usage in the cluster."""

    import grpc
    from ray.core.generated import node_manager_pb2
    from ray.core.generated import node_manager_pb2_grpc

    # We can ask any Raylet for the global memory info, that Raylet internally
    # asks all nodes in the cluster for memory stats.
    if (node_manager_address is None or node_manager_port is None):
        raylet = ray.nodes()[0]
        raylet_address = "{}:{}".format(raylet["NodeManagerAddress"],
                                        raylet["NodeManagerPort"])
    else:
        raylet_address = "{}:{}".format(node_manager_address,
                                        node_manager_port)
    channel = grpc.insecure_channel(
        raylet_address,
        options=[
            ("grpc.max_send_message_length", MAX_MESSAGE_LENGTH),
            ("grpc.max_receive_message_length", MAX_MESSAGE_LENGTH),
        ],
    )
    stub = node_manager_pb2_grpc.NodeManagerServiceStub(channel)
    reply = stub.FormatGlobalMemoryInfo(
        node_manager_pb2.FormatGlobalMemoryInfoRequest(), timeout=30.0)
    if stats_only:
        return store_stats_summary(reply)
    return reply.memory_summary + "\n" + store_stats_summary(reply, stats_only)
예제 #2
0
def get_store_stats(state, node_manager_address=None, node_manager_port=None):
    """Returns a formatted string describing memory usage in the cluster."""

    import grpc
    from ray.core.generated import node_manager_pb2
    from ray.core.generated import node_manager_pb2_grpc

    # We can ask any Raylet for the global memory info, that Raylet internally
    # asks all nodes in the cluster for memory stats.
    if (node_manager_address is None or node_manager_port is None):
        # We should ask for a raylet that is alive.
        raylet = None
        for node in state.node_table():
            if node["Alive"]:
                raylet = node
                break
        assert raylet is not None, "Every raylet is dead"
        raylet_address = "{}:{}".format(raylet["NodeManagerAddress"],
                                        raylet["NodeManagerPort"])
    else:
        raylet_address = "{}:{}".format(node_manager_address,
                                        node_manager_port)
    channel = grpc.insecure_channel(
        raylet_address,
        options=[
            ("grpc.max_send_message_length", MAX_MESSAGE_LENGTH),
            ("grpc.max_receive_message_length", MAX_MESSAGE_LENGTH),
        ],
    )
    stub = node_manager_pb2_grpc.NodeManagerServiceStub(channel)
    reply = stub.FormatGlobalMemoryInfo(
        node_manager_pb2.FormatGlobalMemoryInfoRequest(
            include_memory_info=False),
        timeout=30.0)
    return store_stats_summary(reply)
예제 #3
0
def memory_summary(node_manager_address=None,
                   node_manager_port=None,
                   stats_only=False):
    """Returns a formatted string describing memory usage in the cluster."""

    import grpc
    from ray.core.generated import node_manager_pb2
    from ray.core.generated import node_manager_pb2_grpc

    # We can ask any Raylet for the global memory info, that Raylet internally
    # asks all nodes in the cluster for memory stats.
    if (node_manager_address is None or node_manager_port is None):
        raylet = ray.nodes()[0]
        raylet_address = "{}:{}".format(raylet["NodeManagerAddress"],
                                        raylet["NodeManagerPort"])
    else:
        raylet_address = "{}:{}".format(node_manager_address,
                                        node_manager_port)
    channel = grpc.insecure_channel(
        raylet_address,
        options=[
            ("grpc.max_send_message_length", MAX_MESSAGE_LENGTH),
            ("grpc.max_receive_message_length", MAX_MESSAGE_LENGTH),
        ],
    )
    stub = node_manager_pb2_grpc.NodeManagerServiceStub(channel)
    reply = stub.FormatGlobalMemoryInfo(
        node_manager_pb2.FormatGlobalMemoryInfoRequest(), timeout=30.0)
    store_summary = "--- Aggregate object store stats across all nodes ---\n"
    store_summary += (
        "Plasma memory usage {} MiB, {} objects, {}% full\n".format(
            int(reply.store_stats.object_store_bytes_used / (1024 * 1024)),
            reply.store_stats.num_local_objects,
            round(
                100 * reply.store_stats.object_store_bytes_used /
                reply.store_stats.object_store_bytes_avail, 2)))
    if reply.store_stats.spill_time_total_s > 0:
        store_summary += (
            "Spilled {} MiB, {} objects, avg write throughput {} MiB/s\n".
            format(
                int(reply.store_stats.spilled_bytes_total / (1024 * 1024)),
                reply.store_stats.spilled_objects_total,
                int(reply.store_stats.spilled_bytes_total / (1024 * 1024) /
                    reply.store_stats.spill_time_total_s)))
    if reply.store_stats.restore_time_total_s > 0:
        store_summary += (
            "Restored {} MiB, {} objects, avg read throughput {} MiB/s\n".
            format(
                int(reply.store_stats.restored_bytes_total / (1024 * 1024)),
                reply.store_stats.restored_objects_total,
                int(reply.store_stats.restored_bytes_total / (1024 * 1024) /
                    reply.store_stats.restore_time_total_s)))
    if reply.store_stats.consumed_bytes > 0:
        store_summary += ("Objects consumed by Ray tasks: {} MiB.".format(
            int(reply.store_stats.consumed_bytes / (1024 * 1024))))
    if stats_only:
        return store_summary
    return reply.memory_summary + "\n" + store_summary
예제 #4
0
def get_store_stats(timeout=5):
    from ray.core.generated import node_manager_pb2

    stub = _get_raylet_stub()
    reply = stub.FormatGlobalMemoryInfo(
        node_manager_pb2.FormatGlobalMemoryInfoRequest(
            include_memory_info=False),
        timeout=timeout)
    return reply.store_stats
예제 #5
0
def memory_summary():
    """Returns a formatted string describing memory usage in the cluster."""

    import grpc
    from ray.core.generated import node_manager_pb2
    from ray.core.generated import node_manager_pb2_grpc

    # We can ask any Raylet for the global memory info.
    raylet = ray.nodes()[0]
    raylet_address = "{}:{}".format(raylet["NodeManagerAddress"],
                                    ray.nodes()[0]["NodeManagerPort"])
    channel = grpc.insecure_channel(raylet_address)
    stub = node_manager_pb2_grpc.NodeManagerServiceStub(channel)
    reply = stub.FormatGlobalMemoryInfo(
        node_manager_pb2.FormatGlobalMemoryInfoRequest(), timeout=30.0)
    return reply.memory_summary