Esempio n. 1
0
def gen_avg_return(agent, env):
    '''Generate average return for agent and an env'''
    with util.ctx_lab_mode('eval'):  # enter eval context
        agent.algorithm.update()  # set explore_var etc. to end_val under ctx
    with torch.no_grad():
        ret = gen_return(agent, env)
    # exit eval context, restore variables simply by updating
    agent.algorithm.update()
    return ret
Esempio n. 2
0
def gen_avg_return(agent, env, num_eval=NUM_EVAL):
    '''Generate average return for agent and an env'''
    with util.ctx_lab_mode('eval'):  # enter eval context
        agent.algorithm.update()  # set explore_var etc. to end_val under ctx
        with torch.no_grad():
            returns = [gen_return(agent, env) for i in range(num_eval)]
    # exit eval context, restore variables simply by updating
    agent.algorithm.update()
    return np.mean(returns)
Esempio n. 3
0
    def __init__(self, spec, global_nets=None):
        self.spec = spec
        self.index = self.spec['meta']['session']
        util.set_random_seed(self.spec)
        util.set_cuda_id(self.spec)
        util.set_logger(self.spec, logger, 'session')
        spec_util.save(spec, unit='session')

        self.agent, self.env = make_agent_env(self.spec, global_nets)
        with util.ctx_lab_mode('eval'):  # env for eval
            self.eval_env = make_env(self.spec)
        logger.info(util.self_desc(self))
Esempio n. 4
0
 def run_eval_episode(self):
     with util.ctx_lab_mode('eval'):  # enter eval context
         self.agent.algorithm.update(
         )  # set explore_var etc. to end_val under ctx
         self.eval_env.clock.tick('epi')
         logger.info(
             f'Running eval episode for trial {self.info_space.get("trial")} session {self.index}'
         )
         total_reward = 0
         reward, state, done = self.eval_env.reset()
         while not done:
             self.eval_env.clock.tick('t')
             action = self.agent.act(state)
             reward, state, done = self.eval_env.step(action)
             total_reward += reward
     # exit eval context, restore variables simply by updating
     self.agent.algorithm.update()
     # update body.eval_df
     self.agent.body.eval_update(self.eval_env, total_reward)
     self.agent.body.log_summary(body_df_kind='eval')
Esempio n. 5
0
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_logger(self.spec, self.info_space, logger, 'session')
        self.data = None

        # init singleton agent and env
        self.env = make_env(self.spec)
        util.set_rand_seed(self.info_space.get_random_seed(), self.env)
        with util.ctx_lab_mode('eval'):  # env for eval
            self.eval_env = make_env(self.spec)
            util.set_rand_seed(self.info_space.get_random_seed(),
                               self.eval_env)
        util.try_set_cuda_id(self.spec, self.info_space)
        body = Body(self.env, self.spec['agent'])
        self.agent = Agent(self.spec,
                           self.info_space,
                           body=body,
                           global_nets=global_nets)

        enable_aeb_space(self)  # to use lab's data analysis framework
        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')