Ejemplo n.º 1
0
def debug(address):
    """Show all active breakpoints and exceptions in the Ray debugger."""
    from telnetlib import Telnet
    if not address:
        address = services.get_ray_address_to_use_or_die()
    logger.info(f"Connecting to Ray instance at {address}.")
    ray.init(address=address)
    while True:
        active_sessions = ray.experimental.internal_kv._internal_kv_list(
            "RAY_PDB_")
        print("Active breakpoints:")
        for i, active_session in enumerate(active_sessions):
            data = json.loads(
                ray.experimental.internal_kv._internal_kv_get(active_session))
            print(
                str(i) + ": " + data["proctitle"] + " | " + data["filename"] +
                ":" + str(data["lineno"]))
            print(data["traceback"])
        inp = input("Enter breakpoint index or press enter to refresh: ")
        if inp == "":
            print()
            continue
        else:
            index = int(inp)
            session = json.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    active_sessions[index]))
            host, port = session["pdb_address"].split(":")
            with Telnet(host, int(port)) as tn:
                tn.interact()
Ejemplo n.º 2
0
def status(address):
    """Print cluster status, including autoscaling info."""
    if not address:
        address = services.get_ray_address_to_use_or_die()
    logger.info(f"Connecting to Ray instance at {address}.")
    ray.init(address=address)
    print(debug_status())
Ejemplo n.º 3
0
def memory(address, redis_password):
    """Print object references held in a Ray cluster."""
    if not address:
        address = services.get_ray_address_to_use_or_die()
    logger.info(f"Connecting to Ray instance at {address}.")
    ray.init(address=address, _redis_password=redis_password)
    print(ray.internal.internal_api.memory_summary())
Ejemplo n.º 4
0
def global_gc(address):
    """Trigger Python garbage collection on all cluster workers."""
    if not address:
        address = services.get_ray_address_to_use_or_die()
    logger.info(f"Connecting to Ray instance at {address}.")
    ray.init(address=address)
    ray.internal.internal_api.global_gc()
    print("Triggered gc.collect() on all workers.")
Ejemplo n.º 5
0
def timeline(address):
    """Take a Chrome tracing timeline for a Ray cluster."""
    if not address:
        address = services.get_ray_address_to_use_or_die()
    logger.info(f"Connecting to Ray instance at {address}.")
    ray.init(address=address)
    time = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    filename = os.path.join(ray.utils.get_user_temp_dir(),
                            f"ray-timeline-{time}.json")
    ray.timeline(filename=filename)
    size = os.path.getsize(filename)
    logger.info(f"Trace file written to {filename} ({size} bytes).")
    logger.info(
        "You can open this with chrome://tracing in the Chrome browser.")
Ejemplo n.º 6
0
def memory_summary(address=None,
                   redis_password=ray_constants.REDIS_DEFAULT_PASSWORD,
                   group_by="NODE_ADDRESS",
                   sort_by="OBJECT_SIZE",
                   line_wrap=True,
                   stats_only=False):
    from ray.new_dashboard.memory_utils import memory_summary
    if not address:
        address = services.get_ray_address_to_use_or_die()
    state = GlobalState()
    state._initialize_global_state(address, redis_password)
    if stats_only:
        return get_store_stats(state)
    return (memory_summary(state, group_by, sort_by, line_wrap) +
            get_store_stats(state))