def get_agent_applicable_actions( self, memory: D.T_memory[D.T_state], other_agents_actions: D.T_agent[D.T_concurrency[D.T_event]], agent: str, ) -> Space[D.T_event]: if memory[agent] == self._agents_goals[agent]: return ListSpace([AgentAction.stay]) else: # compute other agents' most probably occupied next cells occupied_next_cells = set() for other_agent, other_agent_action in other_agents_actions.items( ): if other_agent_action == AgentAction.left: occupied_next_cells.add( (memory[other_agent].x - 1, memory[other_agent].y)) elif other_agent_action == AgentAction.right: occupied_next_cells.add( (memory[other_agent].x + 1, memory[other_agent].y)) elif other_agent_action == AgentAction.up: occupied_next_cells.add( (memory[other_agent].x, memory[other_agent].y - 1)) elif other_agent_action == AgentAction.down: occupied_next_cells.add( (memory[other_agent].x, memory[other_agent].y + 1)) elif other_agent_action == AgentAction.stay: occupied_next_cells.add( (memory[other_agent].x, memory[other_agent].y)) # now, compute application actions applicable_actions = [AgentAction.stay] if (memory[agent].y > 0 and self._maze[memory[agent].y - 1][memory[agent].x] == 1 and (memory[agent].x, memory[agent].y - 1) not in occupied_next_cells): applicable_actions.append(AgentAction.up) if (memory[agent].y < self._num_rows - 1 and self._maze[memory[agent].y + 1][memory[agent].x] == 1 and (memory[agent].x, memory[agent].y + 1) not in occupied_next_cells): applicable_actions.append(AgentAction.down) if (memory[agent].x > 0 and self._maze[memory[agent].y][memory[agent].x - 1] == 1 and (memory[agent].x - 1, memory[agent].y) not in occupied_next_cells): applicable_actions.append(AgentAction.left) if (memory[agent].x < self._num_cols - 1 and self._maze[memory[agent].y][memory[agent].x + 1] == 1 and (memory[agent].x + 1, memory[agent].y) not in occupied_next_cells): applicable_actions.append(AgentAction.right) return ListSpace(applicable_actions)
def _get_goals_(self) -> Space[D.T_observation]: # Set the end position as goal return ListSpace([self.end])