Ejemplo n.º 1
0
def train_val(args, agent, train_env, val_envs):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    m_dict = {
            'follower': [agent.encoder,agent.decoder],
            'pm': [agent.prog_monitor],
            'follower+pm': [agent.encoder, agent.decoder, agent.prog_monitor],
            'all': agent.modules()
        }
    if agent.scorer:
        m_dict['scorer_all'] = agent.scorer.modules()
        m_dict['scorer_scorer'] = [agent.scorer.scorer]

    if agent.bt_button:
        m_dict['bt_button'] = [agent.bt_button]

    optimizers = [optim.Adam(filter_param(m), lr=learning_rate,
        weight_decay=weight_decay) for m in m_dict[args.grad] if len(filter_param(m))]

    if args.use_pretraining:
        _train(args, pretrain_env, agent, optimizers,
              args.n_pretrain_iters, val_envs=val_envs)

    _train(args, train_env, agent, optimizers,
          args.n_iters, val_envs=val_envs)
Ejemplo n.º 2
0
                    x_1.append(i)
                    x_2.append(j)
                    valid_points += 1
    batch_labels.append((x_1, x_2))

print('valid points', valid_points)

###

from utils import filter_param

m_dict = {
    'follower': [agent.encoder, agent.decoder],
}
optimizers = [
    optim.Adam(filter_param(m), lr=0.0001, weight_decay=0.0005)
    for m in m_dict['follower'] if len(filter_param(m))
]

###


def eval(test_envs, agent):
    for env_name, (val_env, evaluator) in test_envs.items():
        agent.env = val_env
        if hasattr(agent, 'speaker') and agent.speaker:
            agent.speaker.env = val_env
        agent.search = True
        agent.search_logit = True
        agent.search_mean = False
        agent.search_early_stop = True