def check_environment_and_call(self, *args, **kwargs):
            """Checks if reward shaping is done on a matching environment"""
            environment = ArgumentExtractor.extract_argument(kwargs, "environment", None)

            if environment not in self.ENVIRONMENTS:
                raise Exception("Reward shaping method does match environment "
                                "(method:" + func.__name__ + ", environment:" + environment.value + ")")

            return func(self, *args, **kwargs)
예제 #2
0
        def initialize_reward_shaper_and_call(self, *args, **kwargs):
            self.screen = ArgumentExtractor.extract_argument(
                kwargs, "screen", None)
            self.reward = ArgumentExtractor.extract_argument(
                kwargs, "reward", None)
            self.done = ArgumentExtractor.extract_argument(
                kwargs, "done", None)
            self.info = ArgumentExtractor.extract_argument(
                kwargs, "info", None)

            self.player_chicken_pixels, \
            self.car_1_pixels, \
            self.car_2_pixels, \
            self.car_3_pixels, \
            self.car_4_pixels, \
            self.car_5_pixels, \
            self.car_6_pixels, \
            self.car_7_pixels, \
            self.car_8_pixels, \
            self.car_9_pixels, \
            self.car_10_pixels = self.extract_pixels_optimized(self.screen)

            self.player_chicken = VisualComponent(self.player_chicken_pixels,
                                                  self.screen)
            self.cars = [
                VisualComponent(self.car_1_pixels, self.screen),
                VisualComponent(self.car_2_pixels, self.screen),
                VisualComponent(self.car_3_pixels, self.screen),
                VisualComponent(self.car_4_pixels, self.screen),
                VisualComponent(self.car_5_pixels, self.screen),
                VisualComponent(self.car_6_pixels, self.screen),
                VisualComponent(self.car_7_pixels, self.screen),
                VisualComponent(self.car_8_pixels, self.screen),
                VisualComponent(self.car_9_pixels, self.screen),
                VisualComponent(self.car_10_pixels, self.screen)
            ]

            self.lives = self.info["ale.lives"]

            kwargs.pop("current_episode_reward", None)
            kwargs.pop("max_episode_reward", None)
            kwargs.pop("min_episode_reward", None)

            return func(self, *args, **kwargs)
        def initialize_reward_shaper_and_call(self, *args, **kwargs):
            self.screen = ArgumentExtractor.extract_argument(
                kwargs, "screen", None)
            self.reward = ArgumentExtractor.extract_argument(
                kwargs, "reward", None)
            self.done = ArgumentExtractor.extract_argument(
                kwargs, "done", None)
            self.info = ArgumentExtractor.extract_argument(
                kwargs, "info", None)

            self.spaceship_pixels, \
            self.rocks_pixels, \
            self.rays_pixels, \
            aliens_pixels = SpaceInvadersRewardShaper.extract_pixels(self, self.screen)

            self.spaceship = VisualComponent(self.spaceship_pixels,
                                             self.screen)
            self.rays = VisualComponent(self.rays_pixels, self.screen)
            self.lives = self.info["ale.lives"]

            return func(self, *args, **kwargs)
    def reward_ball_hitting_upper_block(self, **kwargs):
        screen = ArgumentExtractor.extract_argument(kwargs, "screen", None)
        original_reward = ArgumentExtractor.extract_argument(
            kwargs, "reward", None)
        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        if original_reward > 0:

            # Count colored blocks on screen
            red_blocks = self.count_blocks(screen, self.RED_BLOCKS_Y_MIN)
            orange_blocks = self.count_blocks(screen, self.ORANGE_BLOCKS_Y_MIN)
            yellow_blocks = self.count_blocks(screen, self.YELLOW_BLOCKS_Y_MIN)
            lime_blocks = self.count_blocks(screen, self.LIME_BLOCKS_Y_MIN)
            green_blocks = self.count_blocks(screen, self.GREEN_BLOCKS_Y_MIN)
            blue_blocks = self.count_blocks(screen, self.BLUE_BLOCKS_Y_MIN)

            # Give reward if number of blocks decreased
            if blue_blocks < BreakoutRewardShaper.BLUE_BLOCKS_ON_SCREEN:
                BreakoutRewardShaper.BLUE_BLOCKS_ON_SCREEN = blue_blocks
                return additional_reward * (1 / 6)
            elif green_blocks < BreakoutRewardShaper.GREEN_BLOCKS_ON_SCREEN:
                BreakoutRewardShaper.GREEN_BLOCKS_ON_SCREEN = green_blocks
                return additional_reward * (2 / 6)
            elif lime_blocks < BreakoutRewardShaper.LIME_BLOCKS_ON_SCREEN:
                BreakoutRewardShaper.LIME_BLOCKS_ON_SCREEN = lime_blocks
                return additional_reward * (3 / 6)
            elif yellow_blocks < BreakoutRewardShaper.YELLOW_BLOCKS_ON_SCREEN:
                BreakoutRewardShaper.YELLOW_BLOCKS_ON_SCREEN = yellow_blocks
                return additional_reward * (4 / 6)
            elif orange_blocks < BreakoutRewardShaper.ORANGE_BLOCKS_ON_SCREEN:
                BreakoutRewardShaper.ORANGE_BLOCKS_ON_SCREEN = orange_blocks
                return additional_reward * (5 / 6)
            elif red_blocks < BreakoutRewardShaper.RED_BLOCKS_ON_SCREEN:
                BreakoutRewardShaper.RED_BLOCKS_ON_SCREEN = red_blocks
                return additional_reward
            else:
                return 0
        else:
            return 0
예제 #5
0
        def initialize_reward_shaper_and_call(self, *args, **kwargs):
            self.screen = ArgumentExtractor.extract_argument(
                kwargs, "screen", None)
            self.reward = ArgumentExtractor.extract_argument(
                kwargs, "reward", None)
            self.done = ArgumentExtractor.extract_argument(
                kwargs, "done", None)
            self.info = ArgumentExtractor.extract_argument(
                kwargs, "info", None)

            self.ball_pixels, \
            self.player_racket_pixels, \
            self.opponent_racket_pixels = self.extract_pixels(self.screen)

            self.ball = VisualComponent(self.ball_pixels, self.screen)
            self.player_racket = VisualComponent(self.player_racket_pixels,
                                                 self.screen)
            self.opponent_racket = VisualComponent(self.opponent_racket_pixels,
                                                   self.screen)
            self.lives = self.info["ale.lives"]

            return func(self, *args, **kwargs)
예제 #6
0
    def reward_opponent_racket_covers_ball(self, **kwargs):
        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)
        """
        Gives an additional reward if the opponent's racket covers y-coordinate of the ball
        :return: shaped reward
        """

        if self.ball.visible and self.opponent_racket.visible \
                and self.opponent_racket.top[1] <= self.ball.center[1] <= self.opponent_racket.bottom[1]:
            return additional_reward
        else:
            return 0
    def reward_player_racket_covers_ball(self, **kwargs):
        """
        Gives an additional reward if the player's racket covers y-coordinate of the ball
        :return: shaped reward
        """

        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        if self.ball.visible and self.player_racket.visible \
                and self.player_racket.left[0] <= self.ball.center[0] <= self.player_racket.right[0]:
            return additional_reward
        else:
            return 0
    def reward(self, **kwargs):
        """
        Gives an additional reward relative to recent episode rewards
        :return: shaped reward
        """

        original_reward = ArgumentExtractor.extract_argument(
            kwargs, "reward", 0)
        current_episode_reward = ArgumentExtractor.extract_argument(
            kwargs, "current_episode_reward", 0)
        max_episode_reward = ArgumentExtractor.extract_argument(
            kwargs, "max_episode_reward", 0)
        min_episode_reward = ArgumentExtractor.extract_argument(
            kwargs, "min_episode_reward", 0)

        if original_reward != 0 \
                and max_episode_reward != None \
                and min_episode_reward != None \
                and min_episode_reward != max_episode_reward:
            return 1 + ((current_episode_reward - max_episode_reward) /
                        (max_episode_reward - min_episode_reward))
        else:
            return 0
        def initialize_reward_shaper_and_call(self, *args, **kwargs):
            self.screen = ArgumentExtractor.extract_argument(kwargs, "screen", None)
            self.reward = ArgumentExtractor.extract_argument(kwargs, "reward", None)
            self.done = ArgumentExtractor.extract_argument(kwargs, "done", None)
            self.info = ArgumentExtractor.extract_argument(kwargs, "info", None)

            pixels = VisualAnalyzer.extract_pixels(self.screen)
            colors = VisualAnalyzer.extract_colors(pixels)

            self.ms_pacman_pixels, \
            self.food_pixels, \
            self.blinky_pixels, \
            self.pinky_pixels, \
            self.inky_pixels, \
            self.clyde_pixels = self.extract_pixels(self.screen)

            self.ms_pacman = None
            self.food = None
            self.blinky = None
            self.pinky = None
            self.inky = None
            self.clyde = None

            if len(self.ms_pacman_pixels) > 0:
                self.ms_pacman = VisualComponent(self.ms_pacman_pixels, self.screen)
            self.food = VisualComponent(self.ms_pacman_pixels, self.screen)
            if len(self.blinky_pixels) > 0:
                self.blinky = VisualComponent(self.blinky_pixels, self.screen)
            if len(self.pinky_pixels) > 0:
              self.pinky = VisualComponent(self.pinky_pixels, self.screen)
            if len(self.inky_pixels) > 0:
             self.inky = VisualComponent(self.inky_pixels, self.screen)
            if len(self.clyde_pixels) > 0:
                self.clyde = VisualComponent(self.clyde_pixels, self.screen)
            self.lives = self.info["ale.lives"]

            return func(self, *args, **kwargs)
예제 #10
0
    def reward_player_racket_hits_ball(self, **kwargs):
        """
        Gives an additional reward if the player's racket hits the ball
        :return: shaped reward
        """

        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        if self.ball.visible and self.player_racket.visible \
                and self.ball.center[0] == self.BALL_CENTER_X_WHEN_PLAYED_BY_PLAYER \
                and self.player_racket.top[1] <= self.ball.center[1] <= self.player_racket.bottom[1]:
            return additional_reward
        else:
            return 0
예제 #11
0
    def init(self, *args, **kwargs):
        self.screen = ArgumentExtractor.extract_argument(
            kwargs, "screen", None)
        self.reward = ArgumentExtractor.extract_argument(
            kwargs, "reward", None)
        self.done = ArgumentExtractor.extract_argument(kwargs, "done", None)
        self.info = ArgumentExtractor.extract_argument(kwargs, "info", None)

        self.player_chicken_pixels, \
        self.car_1_pixels, \
        self.car_2_pixels, \
        self.car_3_pixels, \
        self.car_4_pixels, \
        self.car_5_pixels, \
        self.car_6_pixels, \
        self.car_7_pixels, \
        self.car_8_pixels, \
        self.car_9_pixels, \
        self.car_10_pixels = self.extract_pixels_optimized(self.screen)

        self.player_chicken = VisualComponent(self.player_chicken_pixels,
                                              self.screen)
        self.cars = [
            VisualComponent(self.car_1_pixels, self.screen),
            VisualComponent(self.car_2_pixels, self.screen),
            VisualComponent(self.car_3_pixels, self.screen),
            VisualComponent(self.car_4_pixels, self.screen),
            VisualComponent(self.car_5_pixels, self.screen),
            VisualComponent(self.car_6_pixels, self.screen),
            VisualComponent(self.car_7_pixels, self.screen),
            VisualComponent(self.car_8_pixels, self.screen),
            VisualComponent(self.car_9_pixels, self.screen),
            VisualComponent(self.car_10_pixels, self.screen)
        ]

        self.lives = self.info["ale.lives"]
    def reward_player_racket_close_to_ball_quadratic(self, **kwargs):
        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        reward_max = math.sqrt(additional_reward)
        reward_min = 0

        dist_max = 160
        dist_min = 0

        if self.ball.visible and self.player_racket.visible:
            dist = abs(self.ball.center[0] - self.player_racket.center[0])
            additional_reward = round(
                ((reward_max - reward_min) /
                 (dist_min - dist_max) * dist + reward_max), 4)
            return math.pow(additional_reward, 2)
        else:
            return 0
예제 #13
0
    def reward_opponent_racket_close_to_ball_linear(self, **kwargs):
        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        reward_max = additional_reward
        reward_min = 0

        dist_max = 160
        dist_min = 0

        if self.ball.visible and self.opponent_racket.visible:
            dist = abs(self.ball.center[1] - self.opponent_racket.center[1])
            additional_reward = round(
                ((reward_max - reward_min) /
                 (dist_min - dist_max) * dist + reward_max), 4)
            return additional_reward
        else:
            return 0
예제 #14
0
    def reward_distance_walked(self, **kwargs):
        """
        Gives an additional reward if the chicken has a huge distance to a car that can hit on the lane it stands on
        :return: shaped reward
        """

        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        reward_max = additional_reward
        reward_min = 0

        dist_max = self.CHICKEN_Y_MAX - self.CHICKEN_Y_MIN
        dist_min = 0

        if self.player_chicken.visible:
            distance_walked = FreewayRewardShaper.get_distance_walked(self)
            m = ((reward_max - reward_min) / (dist_max - dist_min))
            additional_reward = m * distance_walked
            return additional_reward
        else:
            return 0
    def reward_ms_pacman_far_from_enemy(self, **kwargs):
        """
        Gives an additional reward if Ms Pacman is far from next enemy
        :return: shaped reward
        """

        additional_reward = ArgumentExtractor.extract_argument(kwargs, "additional_reward", 0)

        distances = []

        distance_to_blinky = MsPacmanRewardShaper.distance(self.ms_pacman, self.blinky)
        distance_to_pinky = MsPacmanRewardShaper.distance(self.ms_pacman, self.pinky)
        distance_to_inky = MsPacmanRewardShaper.distance(self.ms_pacman, self.inky)
        distance_to_clyde = MsPacmanRewardShaper.distance(self.ms_pacman, self.clyde)

        if distance_to_blinky != None:
            distances.append(distance_to_blinky)
        if distance_to_pinky != None:
            distances.append(distance_to_pinky)
        if distance_to_inky != None:
            distances.append(distance_to_inky)
        if distance_to_clyde != None:
            distances.append(distance_to_clyde)

        if len(distances) > 0:
            distance_min = min(distances)
            distance_max = min(distances)

            reward_max = additional_reward
            reward_min = 0

            dist_max = math.sqrt(2 * math.pow(150, 2))
            dist_min = 0

            additional_reward = round(((reward_max - reward_min) / (dist_min - dist_max) * distance_min + reward_max), 4)
            return additional_reward
        else:
            return 0
예제 #16
0
    def reward_distance_to_car(self, **kwargs):
        """
        Gives an additional reward if the chicken has a huge distance to a car that can hit on the lane it stands on
        :return: shaped reward
        """

        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        reward_max = additional_reward
        reward_min = 0

        dist_max = self.screen.shape[1]  # Screen width
        dist_min = 0

        if self.player_chicken.visible:
            distance_to_car = FreewayRewardShaper.get_distance_to_car(self)
            m = ((reward_max - reward_min) / (dist_max - dist_min))
            n = reward_min - (m * dist_min)
            additional_reward = m * distance_to_car + n
            return additional_reward
        else:
            return 0
    def reward_player_avoids_line_of_fire(self, **kwargs):
        """
        Gives an additional reward if the player's spaceship avoids line of fire
        :return: shaped reward
        """

        additional_reward = ArgumentExtractor.extract_argument(
            kwargs, "additional_reward", 0)

        if self.spaceship.visible and self.rays.visible:
            spaceship_x_values = self.get_x_values(self.spaceship_pixels)
            rocks_x_values = self.get_x_values(self.rocks_pixels)
            rays_x_values = self.get_x_values(self.rays_pixels)

            spaceship_in_line_with_rays = any(x in spaceship_x_values
                                              for x in rays_x_values)
            spaceship_in_line_with_rocks = any(x in spaceship_x_values
                                               for x in rocks_x_values)
            if not spaceship_in_line_with_rays or spaceship_in_line_with_rocks:
                return additional_reward
            else:
                return 0
        else:
            return 0