def play_single_game(): """Play a single game using the latest model snapshot""" game = Game() state_size = 16 debug = True model = load_model(path + "/data/checkpoint") game.new_game() state = game.state() state = np.reshape(state, [1, state_size]) while not game.game_over(): # get action from highest q-value act_values = model.predict(state) if len(game.available_actions()) < 4: temp = game.available_actions() for i in range(0, 4): if i not in temp: act_values[0][i] = -100 #returns action with highest q-value action = np.argmax(act_values[0]) reward = (game.do_action(action))**2 next_state = game.state() actions_available = game.available_actions() if len(actions_available) == 0: done = True else: done = False next_state = np.reshape(next_state, [1, state_size]) state = next_state print("Action:", ACTION_NAMES[action]) print("Reward:", reward) game.print_state() if done: states = game.state() states = np.reshape(state, [1, state_size]) max_value = np.amax(states[0]) print("Score:", game.score()) print("Max Value: " + str(2**max_value)) print("Game over.") break
def save(self, name): self.model.save(name) if __name__ == "__main__": game = Game() agent = DQNAgent() # agent.load("./save/file") done = False batch_size = agent.batch_size debug = False save_maxvalues = True output_list = [] for e in range(EPISODES): game.new_game() state = game.state() state = np.reshape(state, [1, agent.state_size]) while not game.game_over(): action = agent.act(state) reward = (game.do_action(action))**2 if (agent.is_max_value_reward): reward = 0 temp = game.state() temp_reshaped = np.reshape(temp, [1, agent.state_size]) temp_max_value = np.amax(temp_reshaped[0]) if temp_max_value > agent.max_value_reward_threshold: reward = agent.max_value_reward_amount next_state = game.state() actions_available = game.available_actions() if len(actions_available) == 0: