def train(): """ Trains an RL model. First initializes environment, logging, and machine learning model. Then iterates through epochs of training and prints score intermittently. """ train = {} train['env'] = gym.make(params['env_name']) train['env'].init(params) train['model'] = PPO(params, train['env'].observation_space.shape[0]).to(params['device']) if params['transfer']: train['model'].load_state_dict(torch.load(sys.argv[1])) logger = Logger() score = 0.0 for n_epi in range(10**6): ep_score = episode(train, n_epi) logger.episode_score(ep_score, n_epi) score += ep_score if n_epi % params['print_interval'] == 0 and n_epi != 0: print(f"Episode #{n_epi:5d} | Avg Score : {score / params['print_interval']:2.2f}") if n_epi >= 0: logger.save_model(score/params['print_interval'], train['model'].state_dict(), n_epi) score = 0.0 train['model'].train_net() env.close()