Beispiel #1
0
def train(cfg_name, resume):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'running on {device}')
    cfg = load_cfg(cfg_name)
    log = Logger(device=device)
    envs = make_vec_envs(**cfg['env'])
    model, n_start = init_model(cfg, envs, device, resume)
    runner = EnvRunner(rollout_size=cfg['train']['rollout_size'],
                       envs=envs,
                       model=model,
                       device=device)
    optim = ParamOptim(**cfg['optimizer'], params=model.parameters())
    agent = Agent(model=model, optim=optim, **cfg['agent'])

    cp_iter = cfg['train']['checkpoint_every']
    log_iter = cfg['train']['log_every']
    n_end = cfg['train']['steps']
    cp_name = cfg['train']['checkpoint_name']

    for n_iter, rollout in zip(trange(n_start, n_end), runner):
        agent_log = agent.update(rollout)

        if n_iter % log_iter == 0:
            log.output({**agent_log, **runner.get_logs()}, n_iter)

        if n_iter > n_start and n_iter % cp_iter == 0:
            f = cp_name.format(n_iter=n_iter // cp_iter)
            torch.save(model.state_dict(), f)
Beispiel #2
0
def train(cfg_name, env_name):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'running on {device}')
    cfg = load_cfg(cfg_name)
    log = Logger(device=device)
    if env_name == 'OT':
        envs = make_obstacle_tower(cfg['train']['num_env'])
    else:
        envs = make_vec_envs(env_name + 'NoFrameskip-v4',
                             cfg['train']['num_env'])

    emb = cfg['embedding']
    model = ActorCritic(output_size=envs.action_space.n,
                        device=device,
                        emb_size=emb['size'])
    model.train().to(device=device)

    runner = EnvRunner(
        rollout_size=cfg['train']['rollout_size'],
        envs=envs,
        model=model,
        device=device,
        emb_stack=emb['history_size'],
    )

    optim = ParamOptim(**cfg['optimizer'], params=model.parameters())
    agent = Agent(model=model, optim=optim, **cfg['agent'])

    n_start = 0
    log_iter = cfg['train']['log_every']
    n_end = cfg['train']['steps']

    log.log.add_text('env', env_name)

    for n_iter, rollout in zip(trange(n_start, n_end), runner):
        progress = n_iter / n_end
        optim.update(progress)
        agent_log = agent.update(rollout, progress)
        if n_iter % log_iter == 0:
            log.output({**agent_log, **runner.get_logs()}, n_iter)

    reward = eval_model(model, envs, emb['history_size'], emb['size'], device)
    reward_str = f'{reward.mean():.2f} ± {reward.std():.2f}'
    log.log.add_text('final', reward_str)
    log.log.close()
Beispiel #3
0
def train(cfg_name):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print(f'running on {device}')
    with open(f'config/{cfg_name}.yaml') as f:
        cfg = yaml.load(f)

    if cfg['train'].get('resume', False):
        n_start, fname = find_checkpoint(cfg)
    else:
        n_start, fname = 0, None

    eps = DecayingEpsilon(**cfg['exploration'], n_iter=n_start)
    log = Logger(eps=eps, device=device)
    envs = get_envs(cfg, device)
    buffer = get_buffer(cfg, envs, device)
    agent = get_agent(cfg, envs, device)
    if fname:
        agent.load(fname)
        agent.init_target()
    env_runner = EnvRunner(envs=envs, qf=agent.qf, device=device, eps=eps)

    cp_iter = cfg['train']['checkpoint_every']
    log_iter = cfg['train']['log_every']
    counter = trange(n_start, cfg['train']['steps'] + 1)

    for n_iter, env_step in zip(counter, env_runner):
        eps.update(n_iter)
        buffer.append(env_step)
        if env_step.get('ep_info') is not None:
            log.output(env_step['ep_info'], n_iter)

        lr = agent.optim.step_lr(n_iter)
        if lr:
            log.output({'lr': lr}, n_iter)

        if len(buffer) > cfg['buffer']['warmup']:
            to_log = agent.update(buffer.sample())
            if n_iter % log_iter == 0:
                log.stats(n_iter)
                log.output(to_log, n_iter)

        if n_iter > n_start and n_iter % cp_iter == 0:
            fname = cfg['train']['checkpoint_name'].format(n_iter=n_iter //
                                                           cp_iter)
            agent.save(fname)