def __init__(self, force_game_fps=10, run_real_time=False): """Initializes the deep Q-network""" super(TetrisPlayer, self).__init__(force_game_fps=10, run_real_time=False) self.new_reward = 0.0 self.terminal = False self.lines_removed = False self.dql = DeepQLearner(ACTIONS, save=True)
def __init__(self, force_game_fps=10, run_real_time=False): """Store necessary state information. See init function for superclass.""" super(PongPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.last_bar1_score = 0.0 self.last_bar2_score = 0.0 self.dql = DeepQLearner(ACTIONS)
def __init__(self, force_game_fps=8, run_real_time=False): super(HalfPongPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.last_hit_count = 0 self.last_miss_count = 0 self.starting_hit_count = 0 self.starting_miss_count = 0 self.score_ratio = None self.dql = DeepQLearner(ACTIONS)
class PongPlayer(PyGamePlayer): """Simple implementation of PyGamePlayer for Pong.""" def __init__(self, force_game_fps=10, run_real_time=False): """Store necessary state information. See init function for superclass.""" super(PongPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.last_bar1_score = 0.0 self.last_bar2_score = 0.0 self.dql = DeepQLearner(ACTIONS) def get_keys_pressed(self, screen_array, feedback, terminal): """Returns the keys to press at the given timestep. See parent class function.""" return self.dql.step(screen_array, feedback, terminal) def get_feedback(self): """Returns the feedback for the current state of the game. In this case, just returns the difference in the learner's score minus the difference in the other player's score. See parent class function. """ # Import must be done here because it starts the game. from games.pong import bar1_score, bar2_score # Get the difference in score between this and the last run. score_change = (bar1_score - self.last_bar1_score) - ( bar2_score - self.last_bar2_score) self.last_bar1_score = bar1_score self.last_bar2_score = bar2_score return float(score_change), score_change != 0 def start(self): """Starts the learner and game.""" super(PongPlayer, self).start() import games.pong
class TetrisPlayer(PyGamePlayer): """Implementation of PyGamePlayer for Tetris.""" def __init__(self, force_game_fps=10, run_real_time=False): """Initializes the deep Q-network""" super(TetrisPlayer, self).__init__(force_game_fps=10, run_real_time=False) self.new_reward = 0.0 self.terminal = False self.lines_removed = False self.dql = DeepQLearner(ACTIONS, save=True) def add_removed_lines_to_reward(self, lines_removed): """Title says all.""" self.new_reward += lines_removed return lines_removed def check_for_game_over(self, ret, text): """Updates player state to determine if the game is over.""" if text == 'Game Over': self.terminal = True # To get the reward we will intercept the removeCompleteLines method # and store what it returns games.tetris.removeCompleteLines = function_intercept( games.tetris.removeCompleteLines, add_removed_lines_to_reward) # Find out if we have had a game over. games.tetris.showTextScreen = function_intercept( games.tetris.showTextScreen, check_for_game_over) def get_keys_pressed(self, screen_array, feedback, terminal): """Returns the keys to press at the given timestep. See parent class function.""" if self.terminal: self.terminal = False return [pgc.K_SPACE] return self.dql.step(screen_array, feedback, terminal) def get_feedback(self): """Returns the feedback for the current state of the game. See parent class function.""" if self.terminal: from games.tetris import blankSpaces terminal = self.terminal # Found the following reward/penalty strategy in a paper. # Coeff is taken from the paper. # Should play around with it a little return float(.35 * blankSpaces), terminal temp = self.new_reward self.new_reward = 0.0 self.lines_removed = False terminal = self.terminal return temp * temp, terminal def start(self): """Starts the player.""" super(TetrisPlayer, self).start() games.tetris.main()
class HalfPongPlayer(PyGamePlayer): """Simple implementation of PyGamePlayer for Half Pong.""" def __init__(self, force_game_fps=8, run_real_time=False): super(HalfPongPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.last_hit_count = 0 self.last_miss_count = 0 self.starting_hit_count = 0 self.starting_miss_count = 0 self.score_ratio = None self.dql = DeepQLearner(ACTIONS) def get_keys_pressed(self, screen_array, reward, terminal): """Returns the keys to press at the given timestep. See parent class function.""" return self.dql.step(screen_array, reward, terminal, self.score_ratio) def get_feedback(self): """Returns the feedback for the current state of the game. In this case, just returns the change in the learner's score. See parent class function. """ # import must be done here because otherwise importing would cause the game to start playing from games.half_pong import hit_count, miss_count # get the difference in score between this and the last run score_change = (hit_count - self.last_hit_count) - ( miss_count - self.last_miss_count) self.last_miss_count = miss_count self.last_hit_count = hit_count if self.last_miss_count % LOG_FREQUENCY == 0: self.starting_miss_count = self.last_miss_count self.starting_hit_count = self.last_hit_count self.score_ratio = float((hit_count - self.starting_hit_count) / (miss_count - self.starting_miss_count + 1)) return float(score_change), score_change == -1 def start(self): super(HalfPongPlayer, self).start() import games.half_pong
class FlappyBirdPlayer(PyGamePlayer): """Implementation of PyGamePlayer for Flappy Bird.""" def __init__(self, force_game_fps=10, run_real_time=True): """Initializes the deep Q-network.""" super(FlappyBirdPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.dql = DeepQLearner(ACTIONS, save=True) def get_keys_pressed(self, screen_array, feedback, terminal): """Returns the keys to press at the given timestep. See parent class function.""" return self.dql.step(screen_array, feedback, terminal) if game.collision else [pgc.K_SPACE] def get_feedback(self): """Returns the feedback for the current state of the game. See parent class function.""" # Rewarded only on dead or alive basis. reward = -500.0 if game.collision else 1.0 return reward, game.collision def start(self): """Starts the player.""" super(FlappyBirdPlayer, self).start() game.main()
def __init__(self, force_game_fps=10, run_real_time=True): """Initializes the deep Q-network.""" super(FlappyBirdPlayer, self).__init__(force_game_fps=force_game_fps, run_real_time=run_real_time) self.dql = DeepQLearner(ACTIONS, save=True)