from itertools import count
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

checkpoint = torch.load('policy_net_with_tail.pth')

policy_net = convnet(config.BOARD_SIZE).float().to(device, non_blocking=True)
policy_net.load_state_dict(checkpoint['model_state_dict'])
policy_net.eval()

env = Snake(config.BOARD_SIZE)

while 1:
    done = False
    obs = env.reset()
    cum_reward = 0
    render = True
    env.render()
    for step in count(1):
        action = select_action(obs, policy_net, 0, explore=False)
        new_obs, reward, done = env.step(action)
        cum_reward += reward
        obs = new_obs

        env.render()

        if done:
            cv2.destroyAllWindows()
            break
    print('Reward:', cum_reward)
예제 #2
0
    def train(self, n, m, pars):
        """
        (Game, int, int, dict()) -> None
        train game and run each step as
        sequence of frames
        n: row tiles of the screen
        m: col tiles of the screen
        pars" parameters passed in for each processors
        """
        # initialize
        record = 0
        game = Snake(n, m, pars.get('n_food', None))
        agent = Agent(game, pars)

        while True:
            # get old state
            state_old = game.get_state()

            # get move
            final_move = agent.get_action(state_old)

            # perform move and get new state
            reward, done, score = game.play_step(final_move, pars)
            state_new = game.get_state()

            # train short memory
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     done)

            # remember
            agent.remember(state_old, final_move, reward, state_new, done)

            # end game if reached num_games from pars or DEFAULT_END_GAME_POINT
            # if set to -1 then run for ever
            if pars.get('num_games', DEFAULT_END_GAME_POINT) != -1:
                if agent.n_games > pars.get('num_games',
                                            DEFAULT_END_GAME_POINT):
                    quit()
                    break

            # when game is over
            if done:
                # reset game attributes
                # increase game generation
                # train the long memory
                game.reset()
                agent.n_games += 1
                agent.train_long_memory()

                # new highscore
                if score > record:
                    record = score
                    # save the best model_state
                    #agent.model.save()

                # takes away food depending on given probability, up until 1 food remains
                decrease_probability = pars.get('decrease_food_chance',
                                                DECREASE_FOOD_CHANCE)
                if (game.n_food > 1) and (random.random() <
                                          decrease_probability):
                    game.n_food -= 1

                # prints game information to console
                print('Game', agent.n_games, 'Score', score, 'Record:', record)

                # appends game information to txt filen at specified path
                self.save_to_file(f"graphs/{pars.get('graph', 'test')}.txt",
                                  agent.n_games, score, record)