def _make_action(self, rid, action):
        """

        :param rid: id of the agent
        :param action: action for the agent
        :return: tuple of following:
            state_prime: new state for the agent
            reward: reward for the agent
            done: agent terminated or not
            info: anything else
        """
        agent = self.agents[rid]
        pos = agent.pos
        # action: 0-north, 1-east, 2-south, 3-west

        new_pos = Position(-1, -1)
        if action == Direction.UP or action == Direction.UP.value:
            new_pos = pos.up()
        elif action == Direction.RIGHT or action == Direction.RIGHT.value:
            new_pos = pos.right()
        elif action == Direction.DOWN or action == Direction.DOWN.value:
            new_pos = pos.down()
        elif action == Direction.LEFT or action == Direction.LEFT.value:
            new_pos = pos.left()
        else:
            print("Invalid direction: %s" % (action))

        agent._set_pos(new_pos)

        try:
            assert new_pos >= Position(
                0, 0), "out of bounds - outside map (below_min)"
            assert new_pos < Position(
                self.height,
                self.width), "out of bounds - outside map (above_max)"
            assert self.grid[
                new_pos.asTuple()] != 0, "out of bounds - internal edge"

        except Exception as e:
            # print("position:", new_pos, "is", e)
            # print("\tRemember (in y,x format) the grid size is", self.grid.shape)
            self.dead = True
            reward = self._get_reward(new_pos)
            state_prime = agent._get_state(new_pos)
            return StepResponse(state_prime, reward, True)
            # return None, reward, True, None

        state_prime = agent._get_state(new_pos)
        reward = self._get_reward(new_pos)
        done = self._get_terminate(new_pos)

        resp = StepResponse(state_prime, reward, done)

        return resp
    def __init__(self,
                 height=5,
                 width=5,
                 num_agents=1,
                 start=Position(0, 0),
                 goal=Position(9E9, 9E9),
                 view_range=1,
                 horizon=2,
                 goal_reward=1.0,
                 pos_hist_len=5,
                 render=False,
                 std=False):
        self.std = std

        self.width = width
        self.height = height
        self.num_agents = num_agents

        self.start = start
        self.goal = goal

        self.view_range = view_range

        self.dead = False

        self.pos_hist_len = pos_hist_len

        if self.std:
            start = Position(y=start[0], x=start[1])
            goal = Position(y=goal[0], x=goal[1])

        if goal.x > self.width or goal.y > self.height:
            goal = Position(self.height - 1, self.width - 1)

        # self.grid stores state of each grid of the map
        # 0-obstacle, 1-walkable, 2-goal, 3-agent
        self.grid = np.ones((height, width), dtype=int)
        self.grid[goal.asTuple()] = 2

        self.num_agents = num_agents

        self.agents = {}
        for n in range(self.num_agents):
            self.agents[n] = Agent(self,
                                   pos_hist_len=self.pos_hist_len,
                                   pos_start=self.start)

        self.reward_map = RewardMap(self.goal, self.height, self.width,
                                    horizon, goal_reward)

        # self.reward_map = np.zeros((height,width))
        # self.reward_map[goal.asTuple()] = 1.0

        self.reward_death = -100.0

        self.renderEnv = render

        self.called_render = False

        if self.renderEnv:
            self._render()