def __str__(self): log_loc = "`/tmp/ray/session_latest/logs`" if self.owner_address: try: addr = Address() addr.ParseFromString(self.owner_address) ip_addr = addr.ip_address worker_id = WorkerID(addr.worker_id) log_loc = ( f"`/tmp/ray/session_latest/logs/*{worker_id.hex()}*`" f" at IP address {ip_addr}" ) except Exception: # Catch all to make sure we always at least print the default # message. pass return ( self._base_str() + "\n\n" + ( "The object's owner has exited. This is the Python " "worker that first created the ObjectRef via `.remote()` or " "`ray.put()`. " f"Check cluster logs ({log_loc}) for more " "information about the Python worker failure." ) )
async def test_logs_manager_resolve_file(logs_manager): node_id = NodeID(b"1" * 28) """ Test filename is given. """ logs_client = logs_manager.data_source_client logs_client.get_all_registered_agent_ids = MagicMock() logs_client.get_all_registered_agent_ids.return_value = [node_id.hex()] expected_filename = "filename" log_file_name, n = await logs_manager.resolve_filename( node_id=node_id, log_filename=expected_filename, actor_id=None, task_id=None, pid=None, get_actor_fn=lambda _: True, timeout=10, ) assert log_file_name == expected_filename assert n == node_id """ Test actor id is given. """ # Actor doesn't exist. with pytest.raises(ValueError): actor_id = ActorID(b"2" * 16) def get_actor_fn(id): if id == actor_id: return None assert False, "Not reachable." log_file_name, n = await logs_manager.resolve_filename( node_id=node_id, log_filename=None, actor_id=actor_id, task_id=None, pid=None, get_actor_fn=get_actor_fn, timeout=10, ) # Actor exists, but it is not scheduled yet. actor_id = ActorID(b"2" * 16) with pytest.raises(ValueError): log_file_name, n = await logs_manager.resolve_filename( node_id=node_id, log_filename=None, actor_id=actor_id, task_id=None, pid=None, get_actor_fn=lambda _: generate_actor_data(actor_id, node_id, None), timeout=10, ) # Actor exists. actor_id = ActorID(b"2" * 16) worker_id = WorkerID(b"3" * 28) logs_manager.list_logs = AsyncMock() logs_manager.list_logs.return_value = { "worker_out": [f"worker-{worker_id.hex()}-123-123.out"] } log_file_name, n = await logs_manager.resolve_filename( node_id=node_id.hex(), log_filename=None, actor_id=actor_id, task_id=None, pid=None, get_actor_fn=lambda _: generate_actor_data(actor_id, node_id, worker_id), timeout=10, ) logs_manager.list_logs.assert_awaited_with( node_id.hex(), 10, glob_filter=f"*{worker_id.hex()}*" ) assert log_file_name == f"worker-{worker_id.hex()}-123-123.out" assert n == node_id.hex() """ Test task id is given. """ with pytest.raises(NotImplementedError): task_id = TaskID(b"2" * 24) log_file_name, n = await logs_manager.resolve_filename( node_id=node_id.hex(), log_filename=None, actor_id=None, task_id=task_id, pid=None, get_actor_fn=lambda _: generate_actor_data(actor_id, node_id, worker_id), timeout=10, ) """ Test pid is given. """ # Pid doesn't exist. with pytest.raises(FileNotFoundError): pid = 456 logs_manager.list_logs = AsyncMock() # Provide the wrong pid. logs_manager.list_logs.return_value = {"worker_out": ["worker-123-123-123.out"]} log_file_name = await logs_manager.resolve_filename( node_id=node_id.hex(), log_filename=None, actor_id=None, task_id=None, pid=pid, get_actor_fn=lambda _: generate_actor_data(actor_id, node_id, worker_id), timeout=10, ) # Pid exists. pid = 123 logs_manager.list_logs = AsyncMock() # Provide the wrong pid. logs_manager.list_logs.return_value = {"worker_out": [f"worker-123-123-{pid}.out"]} log_file_name, n = await logs_manager.resolve_filename( node_id=node_id.hex(), log_filename=None, actor_id=None, task_id=None, pid=pid, get_actor_fn=lambda _: generate_actor_data(actor_id, node_id, worker_id), timeout=10, ) logs_manager.list_logs.assert_awaited_with( node_id.hex(), 10, glob_filter=f"*{pid}*" ) assert log_file_name == f"worker-123-123-{pid}.out" """ Test nothing is given. """ with pytest.raises(FileNotFoundError): log_file_name = await logs_manager.resolve_filename( node_id=node_id.hex(), log_filename=None, actor_id=None, task_id=None, pid=None, get_actor_fn=lambda _: generate_actor_data(actor_id, node_id, worker_id), timeout=10, )