예제 #1
0
 def _discretize_action_space(
         self,
         action_space: gym.spaces.Space) -> D.T_agent[Space[D.T_event]]:
     if isinstance(action_space, gym.spaces.box.Box):
         nb_elements = 1
         for dim in action_space.shape:
             nb_elements *= dim
         actions = []
         for l, h in np.nditer([action_space.low, action_space.high]):
             if l == -float("inf") or h == float("inf"):
                 actions.append([
                     gym.spaces.box.Box(low=l, high=h).sample()
                     for i in range(self._discretization_factor)
                 ])
             else:
                 actions.append([
                     l + ((h - l) / (self._discretization_factor - 1)) * i
                     for i in range(self._discretization_factor)
                 ])
         alist = []
         self._generate_box_action_combinations(actions, action_space.shape,
                                                action_space.dtype, 0, [],
                                                alist)
         return ListSpace(alist)
     elif isinstance(action_space, gym.spaces.discrete.Discrete):
         return ListSpace([i for i in range(action_space.n)])
     elif isinstance(action_space, gym.spaces.multi_discrete.MultiDiscrete):
         generate = lambda d: ([[e] + g for e in range(action_space.nvec[d])
                                for g in generate(d + 1)]
                               if d < len(action_space.nvec) - 1 else
                               [[e] for e in range(action_space.nvec[d])])
         return ListSpace(generate(0))
     elif isinstance(action_space, gym.spaces.multi_binary.MultiBinary):
         generate = lambda d: ([[e] + g for e in [True, False]
                                for g in generate(d + 1)]
                               if d < len(action_space.n) - 1 else
                               [[e] for e in [True, False]])
         return ListSpace(generate(0))
     elif isinstance(action_space, gym.spaces.tuple.Tuple):
         generate = lambda d: ([[e] + g
                                for e in self._discretize_action_space(
                                    action_space.spaces[d]).get_elements()
                                for g in generate(d + 1)]
                               if d < len(action_space.spaces) - 1 else
                               [[e] for e in self._discretize_action_space(
                                   action_space.spaces[d]).get_elements()])
         return ListSpace(generate(0))
     elif isinstance(action_space, gym.spaces.dict.Dict):
         dkeys = action_space.spaces.keys()
         generate = lambda d: (
             [[e] + g for e in self._discretize_action_space(
                 action_space.spaces[dkeys[d]]).get_elements()
              for g in generate(d + 1)] if d < len(dkeys) - 1 else
             [[e] for e in self._discretize_action_space(
                 action_space.spaces[dkeys[d]]).get_elements()])
     else:
         raise RuntimeError("Unknown Gym space element of type " +
                            str(type(action_space)))
예제 #2
0
 def get_agent_applicable_actions(
     self,
     memory: D.T_memory[D.T_state],
     other_agents_actions: D.T_agent[D.T_concurrency[D.T_event]],
     agent: str,
 ) -> Space[D.T_event]:
     if memory[agent] == self._agents_goals[agent]:
         return ListSpace([AgentAction.stay])
     else:
         # compute other agents' most probably occupied next cells
         occupied_next_cells = set()
         for other_agent, other_agent_action in other_agents_actions.items(
         ):
             if other_agent_action == AgentAction.left:
                 occupied_next_cells.add(
                     (memory[other_agent].x - 1, memory[other_agent].y))
             elif other_agent_action == AgentAction.right:
                 occupied_next_cells.add(
                     (memory[other_agent].x + 1, memory[other_agent].y))
             elif other_agent_action == AgentAction.up:
                 occupied_next_cells.add(
                     (memory[other_agent].x, memory[other_agent].y - 1))
             elif other_agent_action == AgentAction.down:
                 occupied_next_cells.add(
                     (memory[other_agent].x, memory[other_agent].y + 1))
             elif other_agent_action == AgentAction.stay:
                 occupied_next_cells.add(
                     (memory[other_agent].x, memory[other_agent].y))
         # now, compute application actions
         applicable_actions = [AgentAction.stay]
         if (memory[agent].y > 0
                 and self._maze[memory[agent].y - 1][memory[agent].x] == 1
                 and (memory[agent].x, memory[agent].y - 1)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.up)
         if (memory[agent].y < self._num_rows - 1
                 and self._maze[memory[agent].y + 1][memory[agent].x] == 1
                 and (memory[agent].x, memory[agent].y + 1)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.down)
         if (memory[agent].x > 0
                 and self._maze[memory[agent].y][memory[agent].x - 1] == 1
                 and (memory[agent].x - 1, memory[agent].y)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.left)
         if (memory[agent].x < self._num_cols - 1
                 and self._maze[memory[agent].y][memory[agent].x + 1] == 1
                 and (memory[agent].x + 1, memory[agent].y)
                 not in occupied_next_cells):
             applicable_actions.append(AgentAction.right)
         return ListSpace(applicable_actions)
예제 #3
0
 def _get_applicable_actions_from(self, state: GridState) -> ListSpace[GridAction]:
     actions = [
         action for action in self._actions if 
         any( self.next_state(state, delta) != None for (delta,_prob) in action._deltas)
     ]
     if state in self._terminals:
         actions.append(self._terminals[state])
     return ListSpace(actions)
예제 #4
0
 def _get_observation_space_(self) -> ListSpace[State]:
     return ListSpace(
         [
             GridState(x,y) for x in range(self._minX, self._maxX+1)
             for y in range(self._minY, self._maxY+1)
             if GridState(x,y) not in self._obstacles
         ]
     )
예제 #5
0
    def __init__(self,
                 discretization_factor: int = 10,
                 branching_factor: int = None) -> None:
        """Initialize GymDiscreteActionDomain.

        # Parameters
        discretization_factor: Number of discretized action variable values per continuous action variable
        branching_factor: if not None, sample branching_factor actions from the resulting list of discretized actions
        """
        self._discretization_factor = discretization_factor
        self._branching_factor = branching_factor
        self._applicable_actions = self._discretize_action_space(
            self.get_action_space()._gym_space)
        if (self._branching_factor is not None
                and len(self._applicable_actions.get_elements()) >
                self._branching_factor):
            self._applicable_actions = ListSpace(
                random.sample(self._applicable_actions.get_elements(),
                              self._branching_factor))
예제 #6
0
 def decode(val):
     aa = []
     if val[0]:
         aa.append(Action.up)
     if val[1]:
         aa.append(Action.down)
     if val[2]:
         aa.append(Action.left)
     if val[3]:
         aa.append(Action.right)
     return ListSpace(aa)
    def _get_observation_space_(self) -> ListSpace[TrafficLightState]:
        '''
          Returns the list of states.  
          It can be a superset.
        '''
        if self._states == None:
            # TODO
            self._states = [
            ]

        return ListSpace(self._states)
 def _get_applicable_actions_from(self, memory: D.T_memory[D.T_state]) -> D.T_agent[Space[D.T_event]]:
     applicable_actions = []
     if memory.y > 0:
         applicable_actions.append(Action.up)
     if memory.y < self.num_rows - 1:
         applicable_actions.append(Action.down)
     if memory.x > 0:
         applicable_actions.append(Action.left)
     if memory.x < self.num_cols - 1:
         applicable_actions.append(Action.right)
     return ListSpace(applicable_actions)
예제 #9
0
 def _get_applicable_actions_from(
         self,
         memory: D.T_memory[D.T_state]) -> D.T_agent[Space[D.T_event]]:
     applicable_actions = []
     if memory.y > 0 and self._maze[memory.y - 1][memory.x] == 1:
         applicable_actions.append(AgentAction.up)
     if memory.y < self._num_rows - 1 and self._maze[memory.y +
                                                     1][memory.x] == 1:
         applicable_actions.append(AgentAction.down)
     if memory.x > 0 and self._maze[memory.y][memory.x - 1] == 1:
         applicable_actions.append(AgentAction.left)
     if memory.x < self._num_cols - 1 and self._maze[memory.y][memory.x +
                                                               1] == 1:
         applicable_actions.append(AgentAction.right)
     return ListSpace(applicable_actions)
 def _get_applicable_actions_from(self, state: TrafficLightState) -> ListSpace[TrafficLightAction]:
     '''
       Returns the list of actions applicable in the specified state.
     '''
     # DONE
     result = []
     north_light = state.north_light
     east_light = state.east_light
     if north_light == SingleLightState.RED and east_light == SingleLightState.GREEN:
         result.append(TrafficLightAction.DO_NOT_SWITCH)
         result.append(TrafficLightAction.SWITCH)
     elif north_light == SingleLightState.GREEN and east_light == SingleLightState.RED:
         result.append(TrafficLightAction.DO_NOT_SWITCH)
         result.append(TrafficLightAction.SWITCH)
     else:
         result.append(TrafficLightAction.DO_NOT_SWITCH)
     return ListSpace(result)
예제 #11
0
class GymDiscreteActionDomain(UnrestrictedActions):
    """This class wraps an OpenAI Gym environment as a domain
        usable by a solver that requires enumerable applicable action sets

    !!! warning
        Using this class requires OpenAI Gym to be installed.
    """
    def __init__(self,
                 discretization_factor: int = 10,
                 branching_factor: int = None) -> None:
        """Initialize GymDiscreteActionDomain.

        # Parameters
        discretization_factor: Number of discretized action variable values per continuous action variable
        branching_factor: if not None, sample branching_factor actions from the resulting list of discretized actions
        """
        self._discretization_factor = discretization_factor
        self._branching_factor = branching_factor
        self._applicable_actions = self._discretize_action_space(
            self.get_action_space()._gym_space)
        if (self._branching_factor is not None
                and len(self._applicable_actions.get_elements()) >
                self._branching_factor):
            self._applicable_actions = ListSpace(
                random.sample(self._applicable_actions.get_elements(),
                              self._branching_factor))

    def _get_applicable_actions_from(
            self,
            memory: D.T_memory[D.T_state]) -> D.T_agent[Space[D.T_event]]:
        return self._applicable_actions

    def _discretize_action_space(
            self,
            action_space: gym.spaces.Space) -> D.T_agent[Space[D.T_event]]:
        if isinstance(action_space, gym.spaces.box.Box):
            nb_elements = 1
            for dim in action_space.shape:
                nb_elements *= dim
            actions = []
            for l, h in np.nditer([action_space.low, action_space.high]):
                if l == -float("inf") or h == float("inf"):
                    actions.append([
                        gym.spaces.box.Box(low=l, high=h).sample()
                        for i in range(self._discretization_factor)
                    ])
                else:
                    actions.append([
                        l + ((h - l) / (self._discretization_factor - 1)) * i
                        for i in range(self._discretization_factor)
                    ])
            alist = []
            self._generate_box_action_combinations(actions, action_space.shape,
                                                   action_space.dtype, 0, [],
                                                   alist)
            return ListSpace(alist)
        elif isinstance(action_space, gym.spaces.discrete.Discrete):
            return ListSpace([i for i in range(action_space.n)])
        elif isinstance(action_space, gym.spaces.multi_discrete.MultiDiscrete):
            generate = lambda d: ([[e] + g for e in range(action_space.nvec[d])
                                   for g in generate(d + 1)]
                                  if d < len(action_space.nvec) - 1 else
                                  [[e] for e in range(action_space.nvec[d])])
            return ListSpace(generate(0))
        elif isinstance(action_space, gym.spaces.multi_binary.MultiBinary):
            generate = lambda d: ([[e] + g for e in [True, False]
                                   for g in generate(d + 1)]
                                  if d < len(action_space.n) - 1 else
                                  [[e] for e in [True, False]])
            return ListSpace(generate(0))
        elif isinstance(action_space, gym.spaces.tuple.Tuple):
            generate = lambda d: ([[e] + g
                                   for e in self._discretize_action_space(
                                       action_space.spaces[d]).get_elements()
                                   for g in generate(d + 1)]
                                  if d < len(action_space.spaces) - 1 else
                                  [[e] for e in self._discretize_action_space(
                                      action_space.spaces[d]).get_elements()])
            return ListSpace(generate(0))
        elif isinstance(action_space, gym.spaces.dict.Dict):
            dkeys = action_space.spaces.keys()
            generate = lambda d: (
                [[e] + g for e in self._discretize_action_space(
                    action_space.spaces[dkeys[d]]).get_elements()
                 for g in generate(d + 1)] if d < len(dkeys) - 1 else
                [[e] for e in self._discretize_action_space(
                    action_space.spaces[dkeys[d]]).get_elements()])
        else:
            raise RuntimeError("Unknown Gym space element of type " +
                               str(type(action_space)))

    def _generate_box_action_combinations(self, actions, shape, dtype, index,
                                          alist, rlist):
        if index < len(actions):
            for a in actions[index]:
                clist = list(alist)
                clist.append(a)
                self._generate_box_action_combinations(actions, shape, dtype,
                                                       index + 1, clist, rlist)
        else:
            ar = np.ndarray(shape=shape, dtype=dtype)
            if len(shape) == 1:
                ar[0] = alist[0]
            else:
                k = 0
                for (i, ) in np.nditer(ar, op_flags=["readwrite"]):
                    print(str(i))
                    i = alist[k]
                    k += 1
            rlist += [ar]
예제 #12
0
 def _get_observation_space_(self) -> ListSpace[State]:
     return ListSpace(self._name_to_state.values())
예제 #13
0
 def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]:
     # Return the space of goal OBSERVATIONS
     return ListSpace([Score(total_bulls=self._n_positions, total_cows=0)])
 def _get_action_space_(self) -> ListSpace[TrafficLightAction]:
     '''
       Returns the list of actions.
     '''
     return ListSpace([TrafficLightAction.SWITCH , TrafficLightAction.DO_NOT_SWITCH])
예제 #15
0
 def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]:
     return {
         agent: ListSpace([goal])
         for agent, goal in self._agents_goals.items()
     }
예제 #16
0
 def _get_action_space_(self) -> D.T_agent[Space[D.T_event]]:
     # Return the possible actions (guesses) as an enumerable space
     return ListSpace(self._h_solutions)
예제 #17
0
 def _get_action_space_(self) -> ListSpace[Action]:
     actions = self._actions.copy()
     for action in self._stay_actions:
         actions.append(action)
     return ListSpace(actions)
예제 #18
0
 def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]:
     return ListSpace([State(x=self.num_cols - 1, y=self.num_rows - 1)])
예제 #19
0
 def _get_applicable_actions_from(self, state: State) -> ListSpace[Action]:
     return ListSpace(self._state_to_action_to_output[state].keys())
예제 #20
0
 def _get_goals_(self) -> D.T_agent[Space[D.T_observation]]:
     return ListSpace([self._goal])
예제 #21
0
 def _get_goals_(self) -> Space[D.T_observation]:
     # Set the end position as goal
     return ListSpace([self.end])