Esempio n. 1
0
def train():
    """ Trains an RL model.

    First initializes environment, logging, and machine learning model. Then iterates
    through epochs of training and prints score intermittently.
    """

    train = {}
    train['env'] = gym.make(params['env_name'])
    train['env'].init(params)
    train['model'] = PPO(params, train['env'].observation_space.shape[0]).to(params['device'])

    if params['transfer']:
        train['model'].load_state_dict(torch.load(sys.argv[1]))

    logger = Logger()

    score = 0.0

    for n_epi in range(10**6):
        ep_score = episode(train, n_epi)
        logger.episode_score(ep_score, n_epi)
        score += ep_score

        if n_epi % params['print_interval'] == 0 and n_epi != 0:
            print(f"Episode #{n_epi:5d} | Avg Score : {score / params['print_interval']:2.2f}")

            if n_epi >= 0:
                logger.save_model(score/params['print_interval'], train['model'].state_dict(), n_epi)

            score = 0.0

        train['model'].train_net()

    env.close()