from itertools import count import time device = torch.device("cuda" if torch.cuda.is_available() else "cpu") checkpoint = torch.load('policy_net_with_tail.pth') policy_net = convnet(config.BOARD_SIZE).float().to(device, non_blocking=True) policy_net.load_state_dict(checkpoint['model_state_dict']) policy_net.eval() env = Snake(config.BOARD_SIZE) while 1: done = False obs = env.reset() cum_reward = 0 render = True env.render() for step in count(1): action = select_action(obs, policy_net, 0, explore=False) new_obs, reward, done = env.step(action) cum_reward += reward obs = new_obs env.render() if done: cv2.destroyAllWindows() break print('Reward:', cum_reward)
def train(self, n, m, pars): """ (Game, int, int, dict()) -> None train game and run each step as sequence of frames n: row tiles of the screen m: col tiles of the screen pars" parameters passed in for each processors """ # initialize record = 0 game = Snake(n, m, pars.get('n_food', None)) agent = Agent(game, pars) while True: # get old state state_old = game.get_state() # get move final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move, pars) state_new = game.get_state() # train short memory agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember agent.remember(state_old, final_move, reward, state_new, done) # end game if reached num_games from pars or DEFAULT_END_GAME_POINT # if set to -1 then run for ever if pars.get('num_games', DEFAULT_END_GAME_POINT) != -1: if agent.n_games > pars.get('num_games', DEFAULT_END_GAME_POINT): quit() break # when game is over if done: # reset game attributes # increase game generation # train the long memory game.reset() agent.n_games += 1 agent.train_long_memory() # new highscore if score > record: record = score # save the best model_state #agent.model.save() # takes away food depending on given probability, up until 1 food remains decrease_probability = pars.get('decrease_food_chance', DECREASE_FOOD_CHANCE) if (game.n_food > 1) and (random.random() < decrease_probability): game.n_food -= 1 # prints game information to console print('Game', agent.n_games, 'Score', score, 'Record:', record) # appends game information to txt filen at specified path self.save_to_file(f"graphs/{pars.get('graph', 'test')}.txt", agent.n_games, score, record)