def _make_action(self, rid, action): """ :param rid: id of the agent :param action: action for the agent :return: tuple of following: state_prime: new state for the agent reward: reward for the agent done: agent terminated or not info: anything else """ agent = self.agents[rid] pos = agent.pos # action: 0-north, 1-east, 2-south, 3-west new_pos = Position(-1, -1) if action == Direction.UP or action == Direction.UP.value: new_pos = pos.up() elif action == Direction.RIGHT or action == Direction.RIGHT.value: new_pos = pos.right() elif action == Direction.DOWN or action == Direction.DOWN.value: new_pos = pos.down() elif action == Direction.LEFT or action == Direction.LEFT.value: new_pos = pos.left() else: print("Invalid direction: %s" % (action)) agent._set_pos(new_pos) try: assert new_pos >= Position( 0, 0), "out of bounds - outside map (below_min)" assert new_pos < Position( self.height, self.width), "out of bounds - outside map (above_max)" assert self.grid[ new_pos.asTuple()] != 0, "out of bounds - internal edge" except Exception as e: # print("position:", new_pos, "is", e) # print("\tRemember (in y,x format) the grid size is", self.grid.shape) self.dead = True reward = self._get_reward(new_pos) state_prime = agent._get_state(new_pos) return StepResponse(state_prime, reward, True) # return None, reward, True, None state_prime = agent._get_state(new_pos) reward = self._get_reward(new_pos) done = self._get_terminate(new_pos) resp = StepResponse(state_prime, reward, done) return resp
def __init__(self, height=5, width=5, num_agents=1, start=Position(0, 0), goal=Position(9E9, 9E9), view_range=1, horizon=2, goal_reward=1.0, pos_hist_len=5, render=False, std=False): self.std = std self.width = width self.height = height self.num_agents = num_agents self.start = start self.goal = goal self.view_range = view_range self.dead = False self.pos_hist_len = pos_hist_len if self.std: start = Position(y=start[0], x=start[1]) goal = Position(y=goal[0], x=goal[1]) if goal.x > self.width or goal.y > self.height: goal = Position(self.height - 1, self.width - 1) # self.grid stores state of each grid of the map # 0-obstacle, 1-walkable, 2-goal, 3-agent self.grid = np.ones((height, width), dtype=int) self.grid[goal.asTuple()] = 2 self.num_agents = num_agents self.agents = {} for n in range(self.num_agents): self.agents[n] = Agent(self, pos_hist_len=self.pos_hist_len, pos_start=self.start) self.reward_map = RewardMap(self.goal, self.height, self.width, horizon, goal_reward) # self.reward_map = np.zeros((height,width)) # self.reward_map[goal.asTuple()] = 1.0 self.reward_death = -100.0 self.renderEnv = render self.called_render = False if self.renderEnv: self._render()