def _next_pos(self, pos, direction): direction = Moves.get_coord(direction) if pos.x == 0 and pos.y == self.board['_passage_y'] and\ direction == Moves.EAST: next_pos = Coord(self.grid.x_size - 1, pos.y) elif pos.x == self.grid.x_size - 1 and\ pos.y == self.board['_passage_y'] and direction == Moves.WEST: next_pos = Coord(0, pos.y) else: next_pos = pos + direction if self.grid.is_inside(next_pos) and self.is_passable(next_pos): return next_pos else: return Coord(-1, -1)
class Compass(Enum): North = Coord(0, 1) East = Coord(1, 0) South = Coord(0, -1) West = Coord(-1, 0) Null = Coord(0, 0) NorthEast = Coord(1, 1) SouthEast = Coord(1, -1) SouthWest = Coord(-1, -1) NorthWest = Coord(-1, 1) @staticmethod def get_coord(idx): return list(Compass)[idx].value
def _get_init_state(self): self.state = PocState() self.state.agent_pos = Coord(*self.board["_poc_home"]) ghost_home = Coord(*self.board["_ghost_home"]) for g in range(self.board["_num_ghosts"]): pos = Coord(ghost_home.x + g % 2, ghost_home.y + g // 2) self.state.ghosts.append(Ghost(pos, direction=-1)) self.state.food_pos = np.random.binomial(1, config["_food_prob"], size=self.grid.n_tiles) # only make free space food idx = (self.board["_maze"] > 0) &\ (self.state.food_pos.reshape(self.board["_maze"].shape) > 0) self.board["_maze"][idx] = 4 self.state.power_step = 0 return self.state
def _smell_food(self, smell_range=1): for x in range(-smell_range, smell_range + 1): for y in range(-smell_range, smell_range + 1): smell_pos = Coord(x, y) idx = self.grid.get_index(self.state.agent_pos + smell_pos) if self.grid.is_inside(self.state.agent_pos + smell_pos) and\ self.state.food_pos[idx]: return True return False
def _decode_state(self, state): poc_state = PocState(Coord(*self.grid.get_coord(state[0]))) ghosts = np.split(state[1:self.board["_num_ghosts"] * 3], 1) for g in ghosts: poc_state.ghosts.append( Ghost(pos=self.grid.get_coord(g[0]), direction=g[1])) poc_state.power_step = state[-1] poc_state.food_pos = state[self.board["_num_ghosts"] * 3:-1].tolist() return poc_state
def _get_init_state(self): # create walls # for tile in self.grid: # value = config["maze"][tile.key[0]] # self.grid.set_value(value, coord=tile.key) self.state = PocState() self.state.agent_pos = Coord(*self.board["_poc_home"]) ghost_home = Coord(*self.board["_ghost_home"]) for g in range(self.board["_num_ghosts"]): pos = Coord(ghost_home.x + g % 2, ghost_home.y + g // 2) self.state.ghosts.append(Ghost(pos, direction=-1)) self.state.food_pos = np.random.binomial(1, config["_food_prob"], size=self.grid.n_tiles + 1) self.state.power_step = 0 return self.state
def __init__(self, board_size=7, num_rocks=8, use_heuristic=False): assert board_size in list( config.keys()) and num_rocks in config[board_size]['size'] self.num_rocks = num_rocks self._use_heuristic = use_heuristic self._rock_pos = [ Coord(*rock) for rock in config[board_size]['rock_pos'] ] self._agent_pos = Coord(*config[board_size]['init_pos']) self.grid = Grid(board_size, board_size) for idx, rock in enumerate(self._rock_pos): self.grid.board[rock] = idx self.action_space = Discrete(len(Action) + self.num_rocks) self.observation_space = Discrete(len(Obs)) self._discount = .95 self._reward_range = 20 self._penalization = -100 self._query = 0
def _decode_state(self, state, as_array=False): agent_pos = Coord(*state['agent_pos']) rock_state = RockState(agent_pos) for r in state['rocks']: rock = Rock(pos=0) rock.__dict__.update(r) rock_state.rocks.append(rock) if as_array: rocks = [] for rock in rock_state.rocks: rocks.append(rock.status) return np.concatenate([[self.grid.get_index(agent_pos)], rocks]) return rock_state
def __dict2np__(self, state): idx = self.grid.get_index(Coord(*state['agent_pos'])) rocks = [] for rock in state['rocks']: rocks.append(rock['status']) return np.concatenate([[idx], rocks])
def get_tag_coord(self, idx): assert 0 <= idx < self.n_tiles if idx < 20: return Coord(idx % 10, idx // 10) idx -= 20 return Coord(idx % 3 + 5, idx // 3 + 2)
def get_coord(self, idx): assert idx >= 0 and idx < self.n_tiles return Coord(idx % self.x_size, idx // self.x_size)
def __init__(self, board_size=7, num_rocks=8, use_heuristic=False, observation='o', stay_inside=False): """ :param board_size: int board is a square of board_size x board_size :param num_rocks: int number of rocks on board :param use_heuristic: bool usage unclear :param observation: str must be one of 'o': observed value only 'po': position of the agent + the above 'poa': the above + the action taken """ assert board_size in list(config.keys()) and \ num_rocks == len(config[board_size]["rock_pos"]) self.num_rocks = num_rocks self._use_heuristic = use_heuristic self._rock_pos = \ [Coord(*rock) for rock in config[board_size]['rock_pos']] self._agent_pos = Coord(*config[board_size]['init_pos']) self.grid = Grid(board_size, board_size) for idx, rock in enumerate(self._rock_pos): self.grid.board[rock] = idx self.action_space = Discrete(len(Action) + self.num_rocks) self._discount = .95 self._reward_range = 20 self._penalization = -100 self._query = 0 if stay_inside: self._out_of_bounds_penalty = 0 else: self._out_of_bounds_penalty = self._penalization self.state = None self.last_action = None self.done = False self.gui = None assert observation in ['o', 'oa', 'po', 'poa'] if observation == 'o': self._make_obs = lambda obs, a: obs self.observation_space = Discrete(len(Obs)) elif observation == 'oa': self._make_obs = self._oa self.observation_space =\ Box(low=0, high=np.append(max(Obs), np.ones(self.action_space.n)), dtype=np.int) elif observation == 'po': self._make_obs = self._po self.observation_space = \ Box(low=0, high=np.append(np.ones(self.grid.n_tiles), max(Obs)), dtype=np.int) elif observation == 'poa': self._make_obs = self._poa self.observation_space = \ Box(low=0, high=np.concatenate((np.ones(self.grid.n_tiles), [max(Obs)], np.ones(self.action_space.n))), dtype=np.int)