def play_single_game(): """Play a single game using the latest model snapshot""" game = Game() state_size = 16 debug = True model = load_model(path + "/data/checkpoint") game.new_game() state = game.state() state = np.reshape(state, [1, state_size]) while not game.game_over(): # get action from highest q-value act_values = model.predict(state) if len(game.available_actions()) < 4: temp = game.available_actions() for i in range(0, 4): if i not in temp: act_values[0][i] = -100 #returns action with highest q-value action = np.argmax(act_values[0]) reward = (game.do_action(action))**2 next_state = game.state() actions_available = game.available_actions() if len(actions_available) == 0: done = True else: done = False next_state = np.reshape(next_state, [1, state_size]) state = next_state print("Action:", ACTION_NAMES[action]) print("Reward:", reward) game.print_state() if done: states = game.state() states = np.reshape(state, [1, state_size]) max_value = np.amax(states[0]) print("Score:", game.score()) print("Max Value: " + str(2**max_value)) print("Game over.") break
done = True else: done = False next_state = np.reshape(next_state, [1, agent.state_size]) agent.remember(state, action, reward, next_state, done) state = next_state if done: if (debug): print("no action available") states = game.state() states = np.reshape(state, [1, agent.state_size]) max_value = np.amax(states[0]) output_list.append([ e, np.asscalar(max_value), np.asscalar(game.score()), agent.epsilon ]) if (debug): print("max_value: " + str(max_value)) break print("episodes: " + str(e)) #save copy of configuration and the episode_maxvalue_data if save_maxvalues: if e == 100: output_list.insert( 0, "gamma: " + str(parameters.gamma) + " | epsilon decay: " + str(parameters.epsilon_decay) + " | learning rate: " + str(parameters.learning_rate) + "\n batch size: " + str(parameters.batch_size) + " | reward = maxVal: " + str(parameters.is_max_value_reward) +