예제 #1
0
    def _next_pos(self, pos, direction):
        direction = Moves.get_coord(direction)
        if pos.x == 0 and pos.y == self.board['_passage_y'] and\
                direction == Moves.EAST:
            next_pos = Coord(self.grid.x_size - 1, pos.y)
        elif pos.x == self.grid.x_size - 1 and\
                pos.y == self.board['_passage_y'] and direction == Moves.WEST:
            next_pos = Coord(0, pos.y)
        else:
            next_pos = pos + direction

        if self.grid.is_inside(next_pos) and self.is_passable(next_pos):
            return next_pos
        else:
            return Coord(-1, -1)
예제 #2
0
class Compass(Enum):
    North = Coord(0, 1)
    East = Coord(1, 0)
    South = Coord(0, -1)
    West = Coord(-1, 0)
    Null = Coord(0, 0)
    NorthEast = Coord(1, 1)
    SouthEast = Coord(1, -1)
    SouthWest = Coord(-1, -1)
    NorthWest = Coord(-1, 1)

    @staticmethod
    def get_coord(idx):
        return list(Compass)[idx].value
예제 #3
0
    def _get_init_state(self):
        self.state = PocState()
        self.state.agent_pos = Coord(*self.board["_poc_home"])
        ghost_home = Coord(*self.board["_ghost_home"])

        for g in range(self.board["_num_ghosts"]):
            pos = Coord(ghost_home.x + g % 2, ghost_home.y + g // 2)
            self.state.ghosts.append(Ghost(pos, direction=-1))

        self.state.food_pos = np.random.binomial(1,
                                                 config["_food_prob"],
                                                 size=self.grid.n_tiles)
        # only make free space food
        idx = (self.board["_maze"] > 0) &\
              (self.state.food_pos.reshape(self.board["_maze"].shape) > 0)
        self.board["_maze"][idx] = 4
        self.state.power_step = 0
        return self.state
예제 #4
0
 def _smell_food(self, smell_range=1):
     for x in range(-smell_range, smell_range + 1):
         for y in range(-smell_range, smell_range + 1):
             smell_pos = Coord(x, y)
             idx = self.grid.get_index(self.state.agent_pos + smell_pos)
             if self.grid.is_inside(self.state.agent_pos + smell_pos) and\
                     self.state.food_pos[idx]:
                 return True
     return False
예제 #5
0
 def _decode_state(self, state):
     poc_state = PocState(Coord(*self.grid.get_coord(state[0])))
     ghosts = np.split(state[1:self.board["_num_ghosts"] * 3], 1)
     for g in ghosts:
         poc_state.ghosts.append(
             Ghost(pos=self.grid.get_coord(g[0]), direction=g[1]))
     poc_state.power_step = state[-1]
     poc_state.food_pos = state[self.board["_num_ghosts"] * 3:-1].tolist()
     return poc_state
예제 #6
0
    def _get_init_state(self):
        # create walls
        # for tile in self.grid:
        #     value = config["maze"][tile.key[0]]
        #     self.grid.set_value(value, coord=tile.key)

        self.state = PocState()
        self.state.agent_pos = Coord(*self.board["_poc_home"])
        ghost_home = Coord(*self.board["_ghost_home"])

        for g in range(self.board["_num_ghosts"]):
            pos = Coord(ghost_home.x + g % 2, ghost_home.y + g // 2)
            self.state.ghosts.append(Ghost(pos, direction=-1))

        self.state.food_pos = np.random.binomial(1,
                                                 config["_food_prob"],
                                                 size=self.grid.n_tiles + 1)
        self.state.power_step = 0
        return self.state
예제 #7
0
파일: rock.py 프로젝트: muthissar/gym_pomdp
    def __init__(self, board_size=7, num_rocks=8, use_heuristic=False):

        assert board_size in list(
            config.keys()) and num_rocks in config[board_size]['size']

        self.num_rocks = num_rocks
        self._use_heuristic = use_heuristic

        self._rock_pos = [
            Coord(*rock) for rock in config[board_size]['rock_pos']
        ]
        self._agent_pos = Coord(*config[board_size]['init_pos'])
        self.grid = Grid(board_size, board_size)

        for idx, rock in enumerate(self._rock_pos):
            self.grid.board[rock] = idx

        self.action_space = Discrete(len(Action) + self.num_rocks)
        self.observation_space = Discrete(len(Obs))
        self._discount = .95
        self._reward_range = 20
        self._penalization = -100
        self._query = 0
예제 #8
0
파일: rock.py 프로젝트: muthissar/gym_pomdp
    def _decode_state(self, state, as_array=False):

        agent_pos = Coord(*state['agent_pos'])
        rock_state = RockState(agent_pos)
        for r in state['rocks']:
            rock = Rock(pos=0)
            rock.__dict__.update(r)
            rock_state.rocks.append(rock)

        if as_array:
            rocks = []
            for rock in rock_state.rocks:
                rocks.append(rock.status)

            return np.concatenate([[self.grid.get_index(agent_pos)], rocks])

        return rock_state
예제 #9
0
파일: rock.py 프로젝트: muthissar/gym_pomdp
 def __dict2np__(self, state):
     idx = self.grid.get_index(Coord(*state['agent_pos']))
     rocks = []
     for rock in state['rocks']:
         rocks.append(rock['status'])
     return np.concatenate([[idx], rocks])
예제 #10
0
 def get_tag_coord(self, idx):
     assert 0 <= idx < self.n_tiles
     if idx < 20:
         return Coord(idx % 10, idx // 10)
     idx -= 20
     return Coord(idx % 3 + 5, idx // 3 + 2)
예제 #11
0
파일: gui.py 프로젝트: muthissar/gym_pomdp
 def get_coord(self, idx):
     assert idx >= 0 and idx < self.n_tiles
     return Coord(idx % self.x_size, idx // self.x_size)
예제 #12
0
    def __init__(self,
                 board_size=7,
                 num_rocks=8,
                 use_heuristic=False,
                 observation='o',
                 stay_inside=False):
        """

        :param board_size: int board is a square of board_size x board_size
        :param num_rocks: int number of rocks on board
        :param use_heuristic: bool usage unclear
        :param observation: str must be one of
                                'o': observed value only
                                'po': position of the agent + the above
                                'poa': the above + the action taken
        """

        assert board_size in list(config.keys()) and \
               num_rocks == len(config[board_size]["rock_pos"])

        self.num_rocks = num_rocks
        self._use_heuristic = use_heuristic

        self._rock_pos = \
            [Coord(*rock) for rock in config[board_size]['rock_pos']]
        self._agent_pos = Coord(*config[board_size]['init_pos'])
        self.grid = Grid(board_size, board_size)

        for idx, rock in enumerate(self._rock_pos):
            self.grid.board[rock] = idx

        self.action_space = Discrete(len(Action) + self.num_rocks)
        self._discount = .95
        self._reward_range = 20
        self._penalization = -100
        self._query = 0
        if stay_inside:
            self._out_of_bounds_penalty = 0
        else:
            self._out_of_bounds_penalty = self._penalization

        self.state = None
        self.last_action = None
        self.done = False

        self.gui = None

        assert observation in ['o', 'oa', 'po', 'poa']
        if observation == 'o':
            self._make_obs = lambda obs, a: obs
            self.observation_space = Discrete(len(Obs))
        elif observation == 'oa':
            self._make_obs = self._oa
            self.observation_space =\
                Box(low=0,
                    high=np.append(max(Obs), np.ones(self.action_space.n)),
                    dtype=np.int)

        elif observation == 'po':
            self._make_obs = self._po
            self.observation_space = \
                Box(low=0,
                    high=np.append(np.ones(self.grid.n_tiles), max(Obs)),
                    dtype=np.int)

        elif observation == 'poa':
            self._make_obs = self._poa
            self.observation_space = \
                Box(low=0,
                    high=np.concatenate((np.ones(self.grid.n_tiles),
                                         [max(Obs)],
                                        np.ones(self.action_space.n))),
                    dtype=np.int)