def train(cfg_name, resume): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'running on {device}') cfg = load_cfg(cfg_name) log = Logger(device=device) envs = make_vec_envs(**cfg['env']) model, n_start = init_model(cfg, envs, device, resume) runner = EnvRunner(rollout_size=cfg['train']['rollout_size'], envs=envs, model=model, device=device) optim = ParamOptim(**cfg['optimizer'], params=model.parameters()) agent = Agent(model=model, optim=optim, **cfg['agent']) cp_iter = cfg['train']['checkpoint_every'] log_iter = cfg['train']['log_every'] n_end = cfg['train']['steps'] cp_name = cfg['train']['checkpoint_name'] for n_iter, rollout in zip(trange(n_start, n_end), runner): agent_log = agent.update(rollout) if n_iter % log_iter == 0: log.output({**agent_log, **runner.get_logs()}, n_iter) if n_iter > n_start and n_iter % cp_iter == 0: f = cp_name.format(n_iter=n_iter // cp_iter) torch.save(model.state_dict(), f)
def train(cfg_name, env_name): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'running on {device}') cfg = load_cfg(cfg_name) log = Logger(device=device) if env_name == 'OT': envs = make_obstacle_tower(cfg['train']['num_env']) else: envs = make_vec_envs(env_name + 'NoFrameskip-v4', cfg['train']['num_env']) emb = cfg['embedding'] model = ActorCritic(output_size=envs.action_space.n, device=device, emb_size=emb['size']) model.train().to(device=device) runner = EnvRunner( rollout_size=cfg['train']['rollout_size'], envs=envs, model=model, device=device, emb_stack=emb['history_size'], ) optim = ParamOptim(**cfg['optimizer'], params=model.parameters()) agent = Agent(model=model, optim=optim, **cfg['agent']) n_start = 0 log_iter = cfg['train']['log_every'] n_end = cfg['train']['steps'] log.log.add_text('env', env_name) for n_iter, rollout in zip(trange(n_start, n_end), runner): progress = n_iter / n_end optim.update(progress) agent_log = agent.update(rollout, progress) if n_iter % log_iter == 0: log.output({**agent_log, **runner.get_logs()}, n_iter) reward = eval_model(model, envs, emb['history_size'], emb['size'], device) reward_str = f'{reward.mean():.2f} ± {reward.std():.2f}' log.log.add_text('final', reward_str) log.log.close()
def train(cfg_name): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'running on {device}') with open(f'config/{cfg_name}.yaml') as f: cfg = yaml.load(f) if cfg['train'].get('resume', False): n_start, fname = find_checkpoint(cfg) else: n_start, fname = 0, None eps = DecayingEpsilon(**cfg['exploration'], n_iter=n_start) log = Logger(eps=eps, device=device) envs = get_envs(cfg, device) buffer = get_buffer(cfg, envs, device) agent = get_agent(cfg, envs, device) if fname: agent.load(fname) agent.init_target() env_runner = EnvRunner(envs=envs, qf=agent.qf, device=device, eps=eps) cp_iter = cfg['train']['checkpoint_every'] log_iter = cfg['train']['log_every'] counter = trange(n_start, cfg['train']['steps'] + 1) for n_iter, env_step in zip(counter, env_runner): eps.update(n_iter) buffer.append(env_step) if env_step.get('ep_info') is not None: log.output(env_step['ep_info'], n_iter) lr = agent.optim.step_lr(n_iter) if lr: log.output({'lr': lr}, n_iter) if len(buffer) > cfg['buffer']['warmup']: to_log = agent.update(buffer.sample()) if n_iter % log_iter == 0: log.stats(n_iter) log.output(to_log, n_iter) if n_iter > n_start and n_iter % cp_iter == 0: fname = cfg['train']['checkpoint_name'].format(n_iter=n_iter // cp_iter) agent.save(fname)