コード例 #1
0
def run_maml(args):
    config = utils.load_config(args.config)
    train_params = config["train_params"]

    # open and close env just to get right action and obs space
    env = sonic_utils.make_from_config(config['env_params'], True)
    env.close()

    # init model
    model = CNNPolicy(
        env.observation_space, env.action_space, train_params["vf_coef"],
        train_params["ent_coef"], train_params["lr_meta"], train_params["max_grad_norm"]

    )

    workers = find_workers("worker")
    init_workers(workers, config, model.get_weights())

    # start run
    workers_results = {w: Pyro4.Future(w.run)() for w in workers}

    savedir = utils.prepare_exp_dir(config, args.exp_name)

    updates = 0
    while True:
        # first zero all grads
        model.optimizer.zero_grad()

        # then apply add grads from remote workers
        wait_run_end(workers_results, model)

        # apply gradient
        model.optimizer.step()

        updates += 1

        # save last weights
        if config['log']['save_last']:
            fpath = savedir / 'last.pt'
            model.save(fpath)

        # save on save period
        if updates % config['log']["save_interval"] == 0 or updates == 1:
            fpath = savedir / '{}.pt'.format(updates)
            model.save(fpath)
コード例 #2
0
ファイル: train.py プロジェクト: lwei0402/retro_contest
def train(config, exp_name='test'):

    train_params = config['train_params']
    env_params = config['env_params']
    log_params = config["log"]

    savedir = None
    if log_params["log_dir"] is not None:
        savedir = utils.prepare_exp_dir(config, exp_name)

    env = sonic_utils.make_from_config(env_params)

    model = CNNPolicy(env.observation_space, env.action_space,
                      train_params["vf_coef"], train_params["ent_coef"],
                      train_params["lr"], train_params["max_grad_norm"])

    if train_params["weights"] is not None:
        model.load(train_params["weights"], train_params["load_adam_params"])

    seg_gen = traj_segment_generator(model,
                                     env,
                                     train_params['n_steps'],
                                     sample=True)

    total_steps = 0
    updates = 0
    t0 = time()
    epinfobuf = deque(maxlen=train_params["ep_info_len"])
    seg_inds = np.arange(train_params['n_steps'])
    n_batches = train_params["n_steps"] // train_params["batch_size"]
    loss_vals = []
    while True:
        if total_steps > train_params['max_steps']:
            break

        # get batch
        seg = seg_gen.__next__()
        add_vtarg(seg, train_params['gamma'], train_params['lam'])

        # add episode info
        epinfobuf.extend(seg['ep_infos'])

        for _ in range(train_params["n_opt_epochs"]):
            np.random.shuffle(seg_inds)
            for i in range(n_batches):
                start = i * train_params["batch_size"]
                end = (i + 1) * train_params["batch_size"]
                inds = seg_inds[start:end]

                losses = model.train(train_params['cliprange'],
                                     seg['ob'][inds], seg['tdlamret'][inds],
                                     seg['ac'][inds], seg['vpred'][inds],
                                     seg["ac_logits"][inds])
                loss_vals.append([l.detach().numpy() for l in losses])

        total_steps += train_params['n_steps']
        updates += 1

        if log_params["log"] and (updates % log_params["log_interval"] == 0
                                  or updates == 1):

            tnow = time()
            fps = int(total_steps / (tnow - t0))
            # ev = explained_variance(values, returns)
            logger.logkv("total_steps", total_steps)
            logger.logkv("nupdates", updates)
            logger.logkv("fps", fps)
            logger.logkv(
                'eprewmean',
                np.mean([epinfo['r'] for epinfo in epinfobuf
                         if 'r' in epinfo]))
            logger.logkv(
                'eprewmean_exp',
                np.mean([
                    epinfo['r_exp'] for epinfo in epinfobuf
                    if 'r_exp' in epinfo
                ]))
            logger.logkv(
                'eplenmean',
                np.mean([epinfo['l'] for epinfo in epinfobuf
                         if 'l' in epinfo]))
            logger.logkv('time_elapsed', tnow - t0)

            for loss_val, loss_name in zip(np.mean(loss_vals, axis=0),
                                           model.loss_names):
                logger.logkv(loss_name, loss_val)
            logger.dumpkvs()

            del loss_vals[:]

        # save last weights
        if log_params['save_last'] and savedir is not None:
            fpath = savedir / 'last.pt'
            model.save(fpath)

        # save on save period
        if (updates % log_params["save_interval"] == 0
                or updates == 1) and savedir is not None:
            fpath = savedir / '{}.pt'.format(updates)
            model.save(fpath)

    return epinfobuf