def gen_avg_return(agent, env, num_eval=NUM_EVAL): '''Generate average return for agent and an env''' with util.ctx_lab_mode('eval'): # enter eval context agent.algorithm.update() # set explore_var etc. to end_val under ctx with torch.no_grad(): returns = [gen_return(agent, env) for i in range(num_eval)] # exit eval context, restore variables simply by updating agent.algorithm.update() return np.mean(returns)
def gen_result(agent, env): '''Generate average return for agent and an env''' with util.ctx_lab_mode('eval'): # enter eval context agent.algorithm.update() # set explore_var etc. to end_val under ctx with torch.no_grad(): _return = gen_return(agent, env) # exit eval context, restore variables simply by updating agent.algorithm.update() return _return
def __init__(self, spec, global_nets=None): self.spec = spec self.index = self.spec['meta']['session'] util.set_random_seed(self.spec) util.set_cuda_id(self.spec) util.set_logger(self.spec, logger, 'session') spec_util.save(spec, unit='session') self.agent, self.env = make_agent_env(self.spec, global_nets) with util.ctx_lab_mode('eval'): # env for eval self.eval_env = make_env(self.spec) self.agent.body.eval_env = self.eval_env self.num_eval = ps.get(self.agent.spec, 'meta.num_eval') self.warmup_epi = ps.get(self.agent.agent_spec, 'algorithm.warmup_epi') or -1 logger.info(util.self_desc(self))