class PacMan: def __init__(self, num_frames, radius, level): self.game = Game(level, radius) self.direction_dict = dict(enumerate(Direction)) self.num_channels, height, width = self.game.array.shape self.state_shape = (self.num_channels * num_frames, height, width) self.num_frames = num_frames self.num_actions = len(Direction) self.frames = [] self.radius = radius @property def score(self): return self.game.score @property def won(self): return self.game.state is Game.State.WON def frame(self): y, x = self.game.pacman frame = self.game.array frame = frame[:, y - self.radius:y + self.radius + 1] frame = frame[:, :, x - self.radius:x + self.radius + 1] frame = frame.astype(np.float32).tolist() return frame def render(self): print(self.game) def reset(self): self.game.reset(self.radius) self.frames = self.frame() for _ in range(self.num_frames - 1): self.game.step(self.direction_dict[0]) self.frames += self.frame() return self.frames def reward(self, rewards): return (10.0 if rewards.food else -2.25 + 2.5 * (self.game.state is Game.State.ACTIVE) + 22.5 * rewards.powerup + 75.0 * rewards.ghost - 65.0 * (self.game.state is Game.State.LOST)) def step(self, action): direction = self.direction_dict[action] rewards = self.game.step(direction) end = self.game.state in (Game.State.WON, Game.State.LOST) self.frames[:self.num_channels] = [] self.frames += self.frame() rewards = self.reward(rewards) return end, self.frames, rewards
def play_deep_q_model(level='level-0', model_path='./nn_model_level_0_2k_iter.h5'): dq_model = DeepQ(level) dq_model.model = load_model(model_path) def ai_func(current_game_state): return dq_model.pick_optimal_action(current_game_state) game = Game(level, init_screen=True, ai_function=ai_func) game.run()
def play_q_learning_model(level='level-0', model_path='./q_table.pkl'): q_model = QLearn() q_model.q_table = load_pickle(model_path) def ai_func(current_game_state): return q_model.pick_optimal_action(current_game_state, printing=False) game = Game(level, init_screen=True, ai_function=ai_func) game.run()
def run_with_game_loop(level='level-2', model_path='./nn_model4500.h5'): dq_model = DeepQ(level) dq_model.model = load_model(model_path) def ai_func(current_game_state): return dq_model.pick_optimal_action(current_game_state) game = Game(level, init_screen=True, ai_function=ai_func) game.run()
def run_with_game_loop(level='level-0', model_path='./q_table.pkl'): q_model = QLearn() q_model.q_table = load_pickle(model_path) def ai_func(current_game_state): return q_model.pick_optimal_action(current_game_state) game = Game(level, init_screen=True, ai_function=ai_func) game.run()
def __init__(self, num_frames, radius, level): self.game = Game(level, radius) self.direction_dict = dict(enumerate(Direction)) self.num_channels, height, width = self.game.array.shape self.state_shape = (self.num_channels * num_frames, height, width) self.num_frames = num_frames self.num_actions = len(Direction) self.frames = [] self.radius = radius
def train(self, level='level-0', num_episodes=10): game = Game(level) discount = 0.8 alpha = 0.2 for i in range(num_episodes): current_game_state = deepcopy(game.initial_game_state) episode_done = False while not episode_done: if i % 50 == 0: print("Iteration number", i) action = self.pick_action(current_game_state) new_game_state, action_event = get_next_game_state_from_action(current_game_state, action.name) if action_event == ActionEvent.WON or action_event == ActionEvent.LOST: episode_done = True if action_event == ActionEvent.WON: print("Won!!") reward = calculate_reward_for_move(action_event) if current_game_state not in self.q_table: self.q_table[current_game_state] = {key: 0.0 for key in Action.get_all_actions()} self.q_table[current_game_state][action] = self.q_table[current_game_state][action] + alpha * (reward + (discount * self.compute_max_q_value(new_game_state)) - self.q_table[current_game_state][action]) current_game_state = new_game_state save_pickle('./q_table', self.q_table, True)
def menu(): run = True game = Game(WIN) game.update() while run: for e in event.get(): if e.type == QUIT: run = False elif e.type == KEYDOWN: game.menu_active = False main(game) run = False quit()
def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet=False, catchExceptions=False, ): agents = [pacmanAgent] + ghostAgents[: layout.getNumGhosts()] initState = GameState() initState.initialize(layout, len(ghostAgents)) game = Game(agents, display, self, catchExceptions=catchExceptions) game.state = initState self.initialState = initState.deepCopy() self.quiet = quiet return game
def newGame(self, layout, agents, display, length, muteAgents, catchExceptions): initState = GameState() initState.initialize(layout, len(agents)) starter = random.randint(0, 1) print(('%s' % ['Red', 'Blue'][starter])) game = Game(agents, display, self, startingIndex=starter, muteAgents=muteAgents, catchExceptions=catchExceptions) game.state = initState game.length = length game.state.data.timeleft = length if 'drawCenterLine' in dir(display): display.drawCenterLine() self._initBlueFood = initState.getBlueFood().count() self._initRedFood = initState.getRedFood().count() return game
def test_setup(): game = Game('level-0', True) game.run()
from pacman.game import Game from deepq.ai_example import get_suggested_move if __name__ == '__main__': game = Game('level-2') game.run()
def train(self): # Init game game = Game('level-2') current_game_state = deepcopy(game.game_state) # Init Memory memory = Memory(max_size=10) # TODO: Init DeepQNetwork model = DeepQ().model gamma = 0.95 done = False count = 0 # TODO: Pre-train to fill up memory while not done: pygame.event.get() # action = pick_action(current_game_state) action = Action.RIGHT if count > 8: action = Action.LEFT next_game_state, action_event = get_next_game_state_from_action( current_game_state, action.value) reward = calculate_reward_for_move(action_event) print(count, action.value, action_event) game.game_state = next_game_state # print(game.game_state) game.animate() if action_event == ActionEvent.LOST: done = True experience = Experience(current_state=current_game_state, action=action, reward=reward, next_state=next_game_state, done=done) memory.add(experience) # nparray = np.asarray(next_game_state.get_text_representation_of_gamestate()) # print(nparray.shape) current_game_state = deepcopy(next_game_state) count += 1 # if count == 10: # print(count) # break y_train = [] # Target Q-value batch = memory.get_mini_batch(batch_size=20) sample: Experience for sample in batch: y_target = model.predict( sample.current_state) # TODO: wrap in list? # Terminal state: Q-target = reward if sample.done: y_target[0][sample.action](sample.reward) else: y_target.append( sample.reward + gamma * np.max(model.predict(sample.next_state))) # TODO
from pacman.game import Game if __name__ == '__main__': game = Game('level-2', True) game.run()