def check_environment_and_call(self, *args, **kwargs): """Checks if reward shaping is done on a matching environment""" environment = ArgumentExtractor.extract_argument(kwargs, "environment", None) if environment not in self.ENVIRONMENTS: raise Exception("Reward shaping method does match environment " "(method:" + func.__name__ + ", environment:" + environment.value + ")") return func(self, *args, **kwargs)
def initialize_reward_shaper_and_call(self, *args, **kwargs): self.screen = ArgumentExtractor.extract_argument( kwargs, "screen", None) self.reward = ArgumentExtractor.extract_argument( kwargs, "reward", None) self.done = ArgumentExtractor.extract_argument( kwargs, "done", None) self.info = ArgumentExtractor.extract_argument( kwargs, "info", None) self.player_chicken_pixels, \ self.car_1_pixels, \ self.car_2_pixels, \ self.car_3_pixels, \ self.car_4_pixels, \ self.car_5_pixels, \ self.car_6_pixels, \ self.car_7_pixels, \ self.car_8_pixels, \ self.car_9_pixels, \ self.car_10_pixels = self.extract_pixels_optimized(self.screen) self.player_chicken = VisualComponent(self.player_chicken_pixels, self.screen) self.cars = [ VisualComponent(self.car_1_pixels, self.screen), VisualComponent(self.car_2_pixels, self.screen), VisualComponent(self.car_3_pixels, self.screen), VisualComponent(self.car_4_pixels, self.screen), VisualComponent(self.car_5_pixels, self.screen), VisualComponent(self.car_6_pixels, self.screen), VisualComponent(self.car_7_pixels, self.screen), VisualComponent(self.car_8_pixels, self.screen), VisualComponent(self.car_9_pixels, self.screen), VisualComponent(self.car_10_pixels, self.screen) ] self.lives = self.info["ale.lives"] kwargs.pop("current_episode_reward", None) kwargs.pop("max_episode_reward", None) kwargs.pop("min_episode_reward", None) return func(self, *args, **kwargs)
def initialize_reward_shaper_and_call(self, *args, **kwargs): self.screen = ArgumentExtractor.extract_argument( kwargs, "screen", None) self.reward = ArgumentExtractor.extract_argument( kwargs, "reward", None) self.done = ArgumentExtractor.extract_argument( kwargs, "done", None) self.info = ArgumentExtractor.extract_argument( kwargs, "info", None) self.spaceship_pixels, \ self.rocks_pixels, \ self.rays_pixels, \ aliens_pixels = SpaceInvadersRewardShaper.extract_pixels(self, self.screen) self.spaceship = VisualComponent(self.spaceship_pixels, self.screen) self.rays = VisualComponent(self.rays_pixels, self.screen) self.lives = self.info["ale.lives"] return func(self, *args, **kwargs)
def reward_ball_hitting_upper_block(self, **kwargs): screen = ArgumentExtractor.extract_argument(kwargs, "screen", None) original_reward = ArgumentExtractor.extract_argument( kwargs, "reward", None) additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) if original_reward > 0: # Count colored blocks on screen red_blocks = self.count_blocks(screen, self.RED_BLOCKS_Y_MIN) orange_blocks = self.count_blocks(screen, self.ORANGE_BLOCKS_Y_MIN) yellow_blocks = self.count_blocks(screen, self.YELLOW_BLOCKS_Y_MIN) lime_blocks = self.count_blocks(screen, self.LIME_BLOCKS_Y_MIN) green_blocks = self.count_blocks(screen, self.GREEN_BLOCKS_Y_MIN) blue_blocks = self.count_blocks(screen, self.BLUE_BLOCKS_Y_MIN) # Give reward if number of blocks decreased if blue_blocks < BreakoutRewardShaper.BLUE_BLOCKS_ON_SCREEN: BreakoutRewardShaper.BLUE_BLOCKS_ON_SCREEN = blue_blocks return additional_reward * (1 / 6) elif green_blocks < BreakoutRewardShaper.GREEN_BLOCKS_ON_SCREEN: BreakoutRewardShaper.GREEN_BLOCKS_ON_SCREEN = green_blocks return additional_reward * (2 / 6) elif lime_blocks < BreakoutRewardShaper.LIME_BLOCKS_ON_SCREEN: BreakoutRewardShaper.LIME_BLOCKS_ON_SCREEN = lime_blocks return additional_reward * (3 / 6) elif yellow_blocks < BreakoutRewardShaper.YELLOW_BLOCKS_ON_SCREEN: BreakoutRewardShaper.YELLOW_BLOCKS_ON_SCREEN = yellow_blocks return additional_reward * (4 / 6) elif orange_blocks < BreakoutRewardShaper.ORANGE_BLOCKS_ON_SCREEN: BreakoutRewardShaper.ORANGE_BLOCKS_ON_SCREEN = orange_blocks return additional_reward * (5 / 6) elif red_blocks < BreakoutRewardShaper.RED_BLOCKS_ON_SCREEN: BreakoutRewardShaper.RED_BLOCKS_ON_SCREEN = red_blocks return additional_reward else: return 0 else: return 0
def initialize_reward_shaper_and_call(self, *args, **kwargs): self.screen = ArgumentExtractor.extract_argument( kwargs, "screen", None) self.reward = ArgumentExtractor.extract_argument( kwargs, "reward", None) self.done = ArgumentExtractor.extract_argument( kwargs, "done", None) self.info = ArgumentExtractor.extract_argument( kwargs, "info", None) self.ball_pixels, \ self.player_racket_pixels, \ self.opponent_racket_pixels = self.extract_pixels(self.screen) self.ball = VisualComponent(self.ball_pixels, self.screen) self.player_racket = VisualComponent(self.player_racket_pixels, self.screen) self.opponent_racket = VisualComponent(self.opponent_racket_pixels, self.screen) self.lives = self.info["ale.lives"] return func(self, *args, **kwargs)
def reward_opponent_racket_covers_ball(self, **kwargs): additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) """ Gives an additional reward if the opponent's racket covers y-coordinate of the ball :return: shaped reward """ if self.ball.visible and self.opponent_racket.visible \ and self.opponent_racket.top[1] <= self.ball.center[1] <= self.opponent_racket.bottom[1]: return additional_reward else: return 0
def reward_player_racket_covers_ball(self, **kwargs): """ Gives an additional reward if the player's racket covers y-coordinate of the ball :return: shaped reward """ additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) if self.ball.visible and self.player_racket.visible \ and self.player_racket.left[0] <= self.ball.center[0] <= self.player_racket.right[0]: return additional_reward else: return 0
def reward(self, **kwargs): """ Gives an additional reward relative to recent episode rewards :return: shaped reward """ original_reward = ArgumentExtractor.extract_argument( kwargs, "reward", 0) current_episode_reward = ArgumentExtractor.extract_argument( kwargs, "current_episode_reward", 0) max_episode_reward = ArgumentExtractor.extract_argument( kwargs, "max_episode_reward", 0) min_episode_reward = ArgumentExtractor.extract_argument( kwargs, "min_episode_reward", 0) if original_reward != 0 \ and max_episode_reward != None \ and min_episode_reward != None \ and min_episode_reward != max_episode_reward: return 1 + ((current_episode_reward - max_episode_reward) / (max_episode_reward - min_episode_reward)) else: return 0
def initialize_reward_shaper_and_call(self, *args, **kwargs): self.screen = ArgumentExtractor.extract_argument(kwargs, "screen", None) self.reward = ArgumentExtractor.extract_argument(kwargs, "reward", None) self.done = ArgumentExtractor.extract_argument(kwargs, "done", None) self.info = ArgumentExtractor.extract_argument(kwargs, "info", None) pixels = VisualAnalyzer.extract_pixels(self.screen) colors = VisualAnalyzer.extract_colors(pixels) self.ms_pacman_pixels, \ self.food_pixels, \ self.blinky_pixels, \ self.pinky_pixels, \ self.inky_pixels, \ self.clyde_pixels = self.extract_pixels(self.screen) self.ms_pacman = None self.food = None self.blinky = None self.pinky = None self.inky = None self.clyde = None if len(self.ms_pacman_pixels) > 0: self.ms_pacman = VisualComponent(self.ms_pacman_pixels, self.screen) self.food = VisualComponent(self.ms_pacman_pixels, self.screen) if len(self.blinky_pixels) > 0: self.blinky = VisualComponent(self.blinky_pixels, self.screen) if len(self.pinky_pixels) > 0: self.pinky = VisualComponent(self.pinky_pixels, self.screen) if len(self.inky_pixels) > 0: self.inky = VisualComponent(self.inky_pixels, self.screen) if len(self.clyde_pixels) > 0: self.clyde = VisualComponent(self.clyde_pixels, self.screen) self.lives = self.info["ale.lives"] return func(self, *args, **kwargs)
def reward_player_racket_hits_ball(self, **kwargs): """ Gives an additional reward if the player's racket hits the ball :return: shaped reward """ additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) if self.ball.visible and self.player_racket.visible \ and self.ball.center[0] == self.BALL_CENTER_X_WHEN_PLAYED_BY_PLAYER \ and self.player_racket.top[1] <= self.ball.center[1] <= self.player_racket.bottom[1]: return additional_reward else: return 0
def init(self, *args, **kwargs): self.screen = ArgumentExtractor.extract_argument( kwargs, "screen", None) self.reward = ArgumentExtractor.extract_argument( kwargs, "reward", None) self.done = ArgumentExtractor.extract_argument(kwargs, "done", None) self.info = ArgumentExtractor.extract_argument(kwargs, "info", None) self.player_chicken_pixels, \ self.car_1_pixels, \ self.car_2_pixels, \ self.car_3_pixels, \ self.car_4_pixels, \ self.car_5_pixels, \ self.car_6_pixels, \ self.car_7_pixels, \ self.car_8_pixels, \ self.car_9_pixels, \ self.car_10_pixels = self.extract_pixels_optimized(self.screen) self.player_chicken = VisualComponent(self.player_chicken_pixels, self.screen) self.cars = [ VisualComponent(self.car_1_pixels, self.screen), VisualComponent(self.car_2_pixels, self.screen), VisualComponent(self.car_3_pixels, self.screen), VisualComponent(self.car_4_pixels, self.screen), VisualComponent(self.car_5_pixels, self.screen), VisualComponent(self.car_6_pixels, self.screen), VisualComponent(self.car_7_pixels, self.screen), VisualComponent(self.car_8_pixels, self.screen), VisualComponent(self.car_9_pixels, self.screen), VisualComponent(self.car_10_pixels, self.screen) ] self.lives = self.info["ale.lives"]
def reward_player_racket_close_to_ball_quadratic(self, **kwargs): additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) reward_max = math.sqrt(additional_reward) reward_min = 0 dist_max = 160 dist_min = 0 if self.ball.visible and self.player_racket.visible: dist = abs(self.ball.center[0] - self.player_racket.center[0]) additional_reward = round( ((reward_max - reward_min) / (dist_min - dist_max) * dist + reward_max), 4) return math.pow(additional_reward, 2) else: return 0
def reward_opponent_racket_close_to_ball_linear(self, **kwargs): additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) reward_max = additional_reward reward_min = 0 dist_max = 160 dist_min = 0 if self.ball.visible and self.opponent_racket.visible: dist = abs(self.ball.center[1] - self.opponent_racket.center[1]) additional_reward = round( ((reward_max - reward_min) / (dist_min - dist_max) * dist + reward_max), 4) return additional_reward else: return 0
def reward_distance_walked(self, **kwargs): """ Gives an additional reward if the chicken has a huge distance to a car that can hit on the lane it stands on :return: shaped reward """ additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) reward_max = additional_reward reward_min = 0 dist_max = self.CHICKEN_Y_MAX - self.CHICKEN_Y_MIN dist_min = 0 if self.player_chicken.visible: distance_walked = FreewayRewardShaper.get_distance_walked(self) m = ((reward_max - reward_min) / (dist_max - dist_min)) additional_reward = m * distance_walked return additional_reward else: return 0
def reward_ms_pacman_far_from_enemy(self, **kwargs): """ Gives an additional reward if Ms Pacman is far from next enemy :return: shaped reward """ additional_reward = ArgumentExtractor.extract_argument(kwargs, "additional_reward", 0) distances = [] distance_to_blinky = MsPacmanRewardShaper.distance(self.ms_pacman, self.blinky) distance_to_pinky = MsPacmanRewardShaper.distance(self.ms_pacman, self.pinky) distance_to_inky = MsPacmanRewardShaper.distance(self.ms_pacman, self.inky) distance_to_clyde = MsPacmanRewardShaper.distance(self.ms_pacman, self.clyde) if distance_to_blinky != None: distances.append(distance_to_blinky) if distance_to_pinky != None: distances.append(distance_to_pinky) if distance_to_inky != None: distances.append(distance_to_inky) if distance_to_clyde != None: distances.append(distance_to_clyde) if len(distances) > 0: distance_min = min(distances) distance_max = min(distances) reward_max = additional_reward reward_min = 0 dist_max = math.sqrt(2 * math.pow(150, 2)) dist_min = 0 additional_reward = round(((reward_max - reward_min) / (dist_min - dist_max) * distance_min + reward_max), 4) return additional_reward else: return 0
def reward_distance_to_car(self, **kwargs): """ Gives an additional reward if the chicken has a huge distance to a car that can hit on the lane it stands on :return: shaped reward """ additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) reward_max = additional_reward reward_min = 0 dist_max = self.screen.shape[1] # Screen width dist_min = 0 if self.player_chicken.visible: distance_to_car = FreewayRewardShaper.get_distance_to_car(self) m = ((reward_max - reward_min) / (dist_max - dist_min)) n = reward_min - (m * dist_min) additional_reward = m * distance_to_car + n return additional_reward else: return 0
def reward_player_avoids_line_of_fire(self, **kwargs): """ Gives an additional reward if the player's spaceship avoids line of fire :return: shaped reward """ additional_reward = ArgumentExtractor.extract_argument( kwargs, "additional_reward", 0) if self.spaceship.visible and self.rays.visible: spaceship_x_values = self.get_x_values(self.spaceship_pixels) rocks_x_values = self.get_x_values(self.rocks_pixels) rays_x_values = self.get_x_values(self.rays_pixels) spaceship_in_line_with_rays = any(x in spaceship_x_values for x in rays_x_values) spaceship_in_line_with_rocks = any(x in spaceship_x_values for x in rocks_x_values) if not spaceship_in_line_with_rays or spaceship_in_line_with_rocks: return additional_reward else: return 0 else: return 0