コード例 #1
0
ファイル: agent.py プロジェクト: treszkai/rl-cw1
 def __init__(self):
     self._ale = ALEInterface()
     self._ale.setInt('random_seed', 123)
     self._ale.setFloat('repeat_action_probability', 0.0)
     self._ale.setBool('color_averaging', False)
     self._ale.loadROM('roms/enduro.bin')
     self._controller = Controller(self._ale)
     self._extractor = StateExtractor(self._ale)
     self._image = None
コード例 #2
0
 def __init__(self):
     self._ale = ALEInterface()
     self._ale.setInt('random_seed', 123)
     self._ale.setFloat('repeat_action_probability', 0.0)
     self._ale.setBool('color_averaging', False)
     self._ale.loadROM('roms/enduro.bin')
     self._controller = Controller(self._ale)
     self._extractor = StateExtractor(self._ale)
     self._image = None
コード例 #3
0
ファイル: agent.py プロジェクト: treszkai/rl-cw1
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            # Observe the environment to set the initial state
            (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
            self.initialise(grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
                self.sense(grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1,
                              self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """
        return self._controller.move(action)

    def initialise(self, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation.

        Args:
            grid (np.ndarray): 11x10 array with the initial environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError
コード例 #4
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            # Observe the environment to set the initial state
            (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
            self.initialise(grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (grid, self._image) = self._extractor.run(draw=draw, scale=4.0)
                self.sense(grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BREAK]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """
        return self._controller.move(action)

    def initialise(self, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation.

        Args:
            grid (np.ndarray): 11x10 array with the initial environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid.

        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError
コード例 #5
0
ファイル: agent.py プロジェクト: jameswilsenach/RL_Enduro
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None
        self._speed_range = 50

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            self._relative_speed = -self._speed_range

            # Observe the environment to set the initial state
            (road, cars, grid, self._image) = self._extractor.run(draw=draw,
                                                                  scale=4.0)
            self.initialise(road, cars, self._relative_speed, grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (road, cars, grid,
                 self._image) = self._extractor.run(draw=draw, scale=4.0)

                if self.collision(cars):
                    self._relative_speed = -self._speed_range

                self.sense(road, cars, self._relative_speed, grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1,
                              self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def collision(self, cars):
        if not cars['others']:
            return False

        x, y, _, _ = cars['self']

        min_dist = sys.float_info.max
        min_angle = 0.

        for c in cars['others']:
            cx, cy, _, _ = c
            dist = np.sqrt((cx - x)**2 + (cy - y)**2)
            if dist < min_dist:
                min_dist = dist
                min_angle = np.arctan2(y - cy, cx - x)

        return min_dist < 18. and 0.1 * np.pi < min_angle and min_angle < 0.9 * np.pi

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """

        if action == Action.ACCELERATE:
            self._relative_speed = min(self._relative_speed + 1,
                                       self._speed_range)
        elif action == Action.BRAKE:
            self._relative_speed = max(self._relative_speed - 1,
                                       -self._speed_range)

        return self._controller.move(action)

    def initialise(self, road, cars, speed, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation. For more information on the arguments
        have a look at the README.md

        Args:
            road: 2-dimensional array containing [x, y] points
                  in pixel coordinates of the road grid
            cars: dictionary which contains the location and the size
                  of the agent and the opponents in pixel coordinates
            speed: the relative speed of the agent with respect the others
            gird:  2-dimensional numpy array containing the latest grid
                   representation of the environment

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, road, cars, speed, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid. For more information on the arguments
        have a look at the README.md

        Args:
            road: 2-dimensional array containing [x, y] points
                  in pixel coordinates of the road grid
            cars: dictionary which contains the location and the size
                  of the agent and the opponents in pixel coordinates
            speed: the relative speed of the agent with respect the others
            gird: 2-dimensional numpy array containing the latest grid
                  representation of the environment
        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError