def _discretize_action_space( self, action_space: gym.spaces.Space) -> D.T_agent[Space[D.T_event]]: if isinstance(action_space, gym.spaces.box.Box): nb_elements = 1 for dim in action_space.shape: nb_elements *= dim actions = [] for l, h in np.nditer([action_space.low, action_space.high]): if l == -float("inf") or h == float("inf"): actions.append([ gym.spaces.box.Box(low=l, high=h).sample() for i in range(self._discretization_factor) ]) else: actions.append([ l + ((h - l) / (self._discretization_factor - 1)) * i for i in range(self._discretization_factor) ]) alist = [] self._generate_box_action_combinations(actions, action_space.shape, action_space.dtype, 0, [], alist) return ListSpace(alist) elif isinstance(action_space, gym.spaces.discrete.Discrete): return ListSpace([i for i in range(action_space.n)]) elif isinstance(action_space, gym.spaces.multi_discrete.MultiDiscrete): generate = lambda d: ([[e] + g for e in range(action_space.nvec[d]) for g in generate(d + 1)] if d < len(action_space.nvec) - 1 else [[e] for e in range(action_space.nvec[d])]) return ListSpace(generate(0)) elif isinstance(action_space, gym.spaces.multi_binary.MultiBinary): generate = lambda d: ([[e] + g for e in [True, False] for g in generate(d + 1)] if d < len(action_space.n) - 1 else [[e] for e in [True, False]]) return ListSpace(generate(0)) elif isinstance(action_space, gym.spaces.tuple.Tuple): generate = lambda d: ([[e] + g for e in self._discretize_action_space( action_space.spaces[d]).get_elements() for g in generate(d + 1)] if d < len(action_space.spaces) - 1 else [[e] for e in self._discretize_action_space( action_space.spaces[d]).get_elements()]) return ListSpace(generate(0)) elif isinstance(action_space, gym.spaces.dict.Dict): dkeys = action_space.spaces.keys() generate = lambda d: ( [[e] + g for e in self._discretize_action_space( action_space.spaces[dkeys[d]]).get_elements() for g in generate(d + 1)] if d < len(dkeys) - 1 else [[e] for e in self._discretize_action_space( action_space.spaces[dkeys[d]]).get_elements()]) else: raise RuntimeError("Unknown Gym space element of type " + str(type(action_space)))
def get_agent_applicable_actions( self, memory: D.T_memory[D.T_state], other_agents_actions: D.T_agent[D.T_concurrency[D.T_event]], agent: str, ) -> Space[D.T_event]: if memory[agent] == self._agents_goals[agent]: return ListSpace([AgentAction.stay]) else: # compute other agents' most probably occupied next cells occupied_next_cells = set() for other_agent, other_agent_action in other_agents_actions.items( ): if other_agent_action == AgentAction.left: occupied_next_cells.add( (memory[other_agent].x - 1, memory[other_agent].y)) elif other_agent_action == AgentAction.right: occupied_next_cells.add( (memory[other_agent].x + 1, memory[other_agent].y)) elif other_agent_action == AgentAction.up: occupied_next_cells.add( (memory[other_agent].x, memory[other_agent].y - 1)) elif other_agent_action == AgentAction.down: occupied_next_cells.add( (memory[other_agent].x, memory[other_agent].y + 1)) elif other_agent_action == AgentAction.stay: occupied_next_cells.add( (memory[other_agent].x, memory[other_agent].y)) # now, compute application actions applicable_actions = [AgentAction.stay] if (memory[agent].y > 0 and self._maze[memory[agent].y - 1][memory[agent].x] == 1 and (memory[agent].x, memory[agent].y - 1) not in occupied_next_cells): applicable_actions.append(AgentAction.up) if (memory[agent].y < self._num_rows - 1 and self._maze[memory[agent].y + 1][memory[agent].x] == 1 and (memory[agent].x, memory[agent].y + 1) not in occupied_next_cells): applicable_actions.append(AgentAction.down) if (memory[agent].x > 0 and self._maze[memory[agent].y][memory[agent].x - 1] == 1 and (memory[agent].x - 1, memory[agent].y) not in occupied_next_cells): applicable_actions.append(AgentAction.left) if (memory[agent].x < self._num_cols - 1 and self._maze[memory[agent].y][memory[agent].x + 1] == 1 and (memory[agent].x + 1, memory[agent].y) not in occupied_next_cells): applicable_actions.append(AgentAction.right) return ListSpace(applicable_actions)
def _get_applicable_actions_from(self, state: GridState) -> ListSpace[GridAction]: actions = [ action for action in self._actions if any( self.next_state(state, delta) != None for (delta,_prob) in action._deltas) ] if state in self._terminals: actions.append(self._terminals[state]) return ListSpace(actions)
def _get_observation_space_(self) -> ListSpace[State]: return ListSpace( [ GridState(x,y) for x in range(self._minX, self._maxX+1) for y in range(self._minY, self._maxY+1) if GridState(x,y) not in self._obstacles ] )
def __init__(self, discretization_factor: int = 10, branching_factor: int = None) -> None: """Initialize GymDiscreteActionDomain. # Parameters discretization_factor: Number of discretized action variable values per continuous action variable branching_factor: if not None, sample branching_factor actions from the resulting list of discretized actions """ self._discretization_factor = discretization_factor self._branching_factor = branching_factor self._applicable_actions = self._discretize_action_space( self.get_action_space()._gym_space) if (self._branching_factor is not None and len(self._applicable_actions.get_elements()) > self._branching_factor): self._applicable_actions = ListSpace( random.sample(self._applicable_actions.get_elements(), self._branching_factor))
def decode(val): aa = [] if val[0]: aa.append(Action.up) if val[1]: aa.append(Action.down) if val[2]: aa.append(Action.left) if val[3]: aa.append(Action.right) return ListSpace(aa)
def _get_observation_space_(self) -> ListSpace[TrafficLightState]: ''' Returns the list of states. It can be a superset. ''' if self._states == None: # TODO self._states = [ ] return ListSpace(self._states)
def _get_applicable_actions_from(self, memory: D.T_memory[D.T_state]) -> D.T_agent[Space[D.T_event]]: applicable_actions = [] if memory.y > 0: applicable_actions.append(Action.up) if memory.y < self.num_rows - 1: applicable_actions.append(Action.down) if memory.x > 0: applicable_actions.append(Action.left) if memory.x < self.num_cols - 1: applicable_actions.append(Action.right) return ListSpace(applicable_actions)
def _get_applicable_actions_from( self, memory: D.T_memory[D.T_state]) -> D.T_agent[Space[D.T_event]]: applicable_actions = [] if memory.y > 0 and self._maze[memory.y - 1][memory.x] == 1: applicable_actions.append(AgentAction.up) if memory.y < self._num_rows - 1 and self._maze[memory.y + 1][memory.x] == 1: applicable_actions.append(AgentAction.down) if memory.x > 0 and self._maze[memory.y][memory.x - 1] == 1: applicable_actions.append(AgentAction.left) if memory.x < self._num_cols - 1 and self._maze[memory.y][memory.x + 1] == 1: applicable_actions.append(AgentAction.right) return ListSpace(applicable_actions)
def _get_applicable_actions_from(self, state: TrafficLightState) -> ListSpace[TrafficLightAction]: ''' Returns the list of actions applicable in the specified state. ''' # DONE result = [] north_light = state.north_light east_light = state.east_light if north_light == SingleLightState.RED and east_light == SingleLightState.GREEN: result.append(TrafficLightAction.DO_NOT_SWITCH) result.append(TrafficLightAction.SWITCH) elif north_light == SingleLightState.GREEN and east_light == SingleLightState.RED: result.append(TrafficLightAction.DO_NOT_SWITCH) result.append(TrafficLightAction.SWITCH) else: result.append(TrafficLightAction.DO_NOT_SWITCH) return ListSpace(result)
class GymDiscreteActionDomain(UnrestrictedActions): """This class wraps an OpenAI Gym environment as a domain usable by a solver that requires enumerable applicable action sets !!! warning Using this class requires OpenAI Gym to be installed. """ def __init__(self, discretization_factor: int = 10, branching_factor: int = None) -> None: """Initialize GymDiscreteActionDomain. # Parameters discretization_factor: Number of discretized action variable values per continuous action variable branching_factor: if not None, sample branching_factor actions from the resulting list of discretized actions """ self._discretization_factor = discretization_factor self._branching_factor = branching_factor self._applicable_actions = self._discretize_action_space( self.get_action_space()._gym_space) if (self._branching_factor is not None and len(self._applicable_actions.get_elements()) > self._branching_factor): self._applicable_actions = ListSpace( random.sample(self._applicable_actions.get_elements(), self._branching_factor)) def _get_applicable_actions_from( self, memory: D.T_memory[D.T_state]) -> D.T_agent[Space[D.T_event]]: return self._applicable_actions def _discretize_action_space( self, action_space: gym.spaces.Space) -> D.T_agent[Space[D.T_event]]: if isinstance(action_space, gym.spaces.box.Box): nb_elements = 1 for dim in action_space.shape: nb_elements *= dim actions = [] for l, h in np.nditer([action_space.low, action_space.high]): if l == -float("inf") or h == float("inf"): actions.append([ gym.spaces.box.Box(low=l, high=h).sample() for i in range(self._discretization_factor) ]) else: actions.append([ l + ((h - l) / (self._discretization_factor - 1)) * i for i in range(self._discretization_factor) ]) alist = [] self._generate_box_action_combinations(actions, action_space.shape, action_space.dtype, 0, [], alist) return ListSpace(alist) elif isinstance(action_space, gym.spaces.discrete.Discrete): return ListSpace([i for i in range(action_space.n)]) elif isinstance(action_space, gym.spaces.multi_discrete.MultiDiscrete): generate = lambda d: ([[e] + g for e in range(action_space.nvec[d]) for g in generate(d + 1)] if d < len(action_space.nvec) - 1 else [[e] for e in range(action_space.nvec[d])]) return ListSpace(generate(0)) elif isinstance(action_space, gym.spaces.multi_binary.MultiBinary): generate = lambda d: ([[e] + g for e in [True, False] for g in generate(d + 1)] if d < len(action_space.n) - 1 else [[e] for e in [True, False]]) return ListSpace(generate(0)) elif isinstance(action_space, gym.spaces.tuple.Tuple): generate = lambda d: ([[e] + g for e in self._discretize_action_space( action_space.spaces[d]).get_elements() for g in generate(d + 1)] if d < len(action_space.spaces) - 1 else [[e] for e in self._discretize_action_space( action_space.spaces[d]).get_elements()]) return ListSpace(generate(0)) elif isinstance(action_space, gym.spaces.dict.Dict): dkeys = action_space.spaces.keys() generate = lambda d: ( [[e] + g for e in self._discretize_action_space( action_space.spaces[dkeys[d]]).get_elements() for g in generate(d + 1)] if d < len(dkeys) - 1 else [[e] for e in self._discretize_action_space( action_space.spaces[dkeys[d]]).get_elements()]) else: raise RuntimeError("Unknown Gym space element of type " + str(type(action_space))) def _generate_box_action_combinations(self, actions, shape, dtype, index, alist, rlist): if index < len(actions): for a in actions[index]: clist = list(alist) clist.append(a) self._generate_box_action_combinations(actions, shape, dtype, index + 1, clist, rlist) else: ar = np.ndarray(shape=shape, dtype=dtype) if len(shape) == 1: ar[0] = alist[0] else: k = 0 for (i, ) in np.nditer(ar, op_flags=["readwrite"]): print(str(i)) i = alist[k] k += 1 rlist += [ar]
def _get_observation_space_(self) -> ListSpace[State]: return ListSpace(self._name_to_state.values())
def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]: # Return the space of goal OBSERVATIONS return ListSpace([Score(total_bulls=self._n_positions, total_cows=0)])
def _get_action_space_(self) -> ListSpace[TrafficLightAction]: ''' Returns the list of actions. ''' return ListSpace([TrafficLightAction.SWITCH , TrafficLightAction.DO_NOT_SWITCH])
def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]: return { agent: ListSpace([goal]) for agent, goal in self._agents_goals.items() }
def _get_action_space_(self) -> D.T_agent[Space[D.T_event]]: # Return the possible actions (guesses) as an enumerable space return ListSpace(self._h_solutions)
def _get_action_space_(self) -> ListSpace[Action]: actions = self._actions.copy() for action in self._stay_actions: actions.append(action) return ListSpace(actions)
def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]: return ListSpace([State(x=self.num_cols - 1, y=self.num_rows - 1)])
def _get_applicable_actions_from(self, state: State) -> ListSpace[Action]: return ListSpace(self._state_to_action_to_output[state].keys())
def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]: return ListSpace([self._goal])
def _get_goals_(self) -> Space[D.T_observation]: # Set the end position as goal return ListSpace([self.end])