예제 #1
0
 def get_agent_applicable_actions(
     self,
     memory: D.T_memory[D.T_state],
     other_agents_actions: D.T_agent[D.T_concurrency[D.T_event]],
     agent: str,
 ) -> Space[D.T_event]:
     if memory[agent] == self._agents_goals[agent]:
         return ListSpace([AgentAction.stay])
     else:
         # compute other agents' most probably occupied next cells
         occupied_next_cells = set()
         for other_agent, other_agent_action in other_agents_actions.items(
         ):
             if other_agent_action == AgentAction.left:
                 occupied_next_cells.add(
                     (memory[other_agent].x - 1, memory[other_agent].y))
             elif other_agent_action == AgentAction.right:
                 occupied_next_cells.add(
                     (memory[other_agent].x + 1, memory[other_agent].y))
             elif other_agent_action == AgentAction.up:
                 occupied_next_cells.add(
                     (memory[other_agent].x, memory[other_agent].y - 1))
             elif other_agent_action == AgentAction.down:
                 occupied_next_cells.add(
                     (memory[other_agent].x, memory[other_agent].y + 1))
             elif other_agent_action == AgentAction.stay:
                 occupied_next_cells.add(
                     (memory[other_agent].x, memory[other_agent].y))
         # now, compute application actions
         applicable_actions = [AgentAction.stay]
         if (memory[agent].y > 0
                 and self._maze[memory[agent].y - 1][memory[agent].x] == 1
                 and (memory[agent].x, memory[agent].y - 1)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.up)
         if (memory[agent].y < self._num_rows - 1
                 and self._maze[memory[agent].y + 1][memory[agent].x] == 1
                 and (memory[agent].x, memory[agent].y + 1)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.down)
         if (memory[agent].x > 0
                 and self._maze[memory[agent].y][memory[agent].x - 1] == 1
                 and (memory[agent].x - 1, memory[agent].y)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.left)
         if (memory[agent].x < self._num_cols - 1
                 and self._maze[memory[agent].y][memory[agent].x + 1] == 1
                 and (memory[agent].x + 1, memory[agent].y)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.right)
         return ListSpace(applicable_actions)
예제 #2
0
 def _get_goals_(self) -> Space[D.T_observation]:
     # Set the end position as goal
     return ListSpace([self.end])