def __init__(self, spec, info_space): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') # init singleton agent and env self.env = OpenAIEnv(self.spec) body = Body(self.env, self.spec['agent']) self.agent = Agent(self.spec, self.info_space, body=body) enable_aeb_space(self) # to use lab's data analysis framework logger.info(f'Initialized session {self.index}')
def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_session_logger(self.spec, self.info_space, logger) util.set_module_seed(self.info_space.get_random_seed()) util.try_set_cuda_id(self.spec, self.info_space) self.data = None # init singleton agent and env self.env = make_env(self.spec) body = Body(self.env, self.spec['agent']) self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}')
def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_logger(self.spec, self.info_space, logger, 'session') self.data = None self.eval_proc = None # reference run_online_eval process # init singleton agent and env self.env = make_env(self.spec) body = Body(self.env, self.spec['agent']) util.set_rand_seed(self.info_space.get_random_seed(), self.env) util.try_set_cuda_id(self.spec, self.info_space) self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) self.sim_env = spec['sim_env'] enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}')
class Session: '''The class which initializes the agent, environment, and runs them.''' def __init__(self, spec, info_space): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') # init singleton agent and env self.env = OpenAIEnv(self.spec) body = Body(self.env, self.spec['agent']) self.agent = Agent(self.spec, self.info_space, body=body) enable_aeb_space(self) # to use lab's data analysis framework logger.info(f'Initialized session {self.index}') def run_episode(self): self.env.clock.tick('epi') reward, state, done = self.env.reset() self.agent.reset(state) while not done: self.env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.env.step(action) self.agent.update(action, reward, state, done) self.agent.body.log_summary() def close(self): self.agent.close() self.env.close() logger.info('Session done and closed.') def run(self, time_limit): t0 = time.time() while time.time() < t0 + time_limit: self.run_episode() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data, self.agent def update_env(self, env): self.env.u_env = env self.agent.body.env.u_env = env
def make_agent_env(spec, global_nets=None): '''Helper to create agent and env given spec''' env = make_env(spec) body = Body(env, spec['agent']) agent = Agent(spec, body=body, global_nets=global_nets) return agent, env
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_session_logger(self.spec, self.info_space, logger) self.data = None # init singleton agent and env self.env = make_env(self.spec) body = Body(self.env, self.spec['agent']) util.set_rand_seed(self.info_space.get_random_seed(), self.env) util.try_set_cuda_id(self.spec, self.info_space) assert not ps.is_list( global_nets ), f'single agent global_nets must be a dict, got {global_nets}' self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def save_if_ckpt(self, agent, env): '''Save for agent, env if episode is at checkpoint''' tick = env.clock.get(env.max_tick_unit) if hasattr(env, 'save_frequency') and 0 < tick < env.max_tick: if env.max_tick_unit == 'epi': to_save = (env.done and tick % env.save_frequency == 0) else: to_save = (tick % env.save_frequency == 0) else: to_save = False if to_save: agent.save(ckpt='last') if analysis.new_best(agent): agent.save(ckpt='best') analysis.analyze_session(self) def run_episode(self): self.env.clock.tick('epi') logger.info( f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.get("epi")}' ) reward, state, done = self.env.reset() self.agent.reset(state) while not done: self.env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.env.step(action) self.agent.update(action, reward, state, done) self.save_if_ckpt(self.agent, self.env) self.agent.body.log_summary() def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent.close() self.env.close() logger.info('Session done and closed.') def run(self): while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick: self.run_episode() if util.get_lab_mode() not in ('enjoy', 'eval') and analysis.all_solved( self.agent): logger.info('All environments solved. Early exit.') break self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_logger(self.spec, self.info_space, logger, 'session') self.data = None self.eval_proc = None # reference run_online_eval process # init singleton agent and env self.env = make_env(self.spec) body = Body(self.env, self.spec['agent']) util.set_rand_seed(self.info_space.get_random_seed(), self.env) util.try_set_cuda_id(self.spec, self.info_space) self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def try_ckpt(self, agent, env): '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end''' clock = env.clock tick = clock.get(env.max_tick_unit) to_ckpt = False if util.get_lab_mode() not in ('enjoy', 'eval') and tick <= env.max_tick: to_ckpt = (tick % env.save_frequency == 0) or tick == env.max_tick if env.max_tick_unit == 'epi': # extra condition for epi to_ckpt = to_ckpt and env.done if to_ckpt: if analysis.new_best(agent): agent.save(ckpt='best') # run online eval for train mode if util.get_lab_mode() == 'train' and self.spec['meta'].get( 'training_eval', False): ckpt = f'epi{clock.epi}-totalt{clock.total_t}' agent.save(ckpt=ckpt) # set reference to eval process for handling self.eval_proc = analysis.run_online_eval( self.spec, self.info_space, ckpt) if tick > 0: # nothing to analyze at start analysis.analyze_session(self) def run_episode(self): self.env.clock.tick('epi') logger.info( f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.epi}' ) reward, state, done = self.env.reset() self.agent.reset(state) while not done: self.try_ckpt(self.agent, self.env) self.env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.env.step(action) self.agent.update(action, reward, state, done) self.try_ckpt(self.agent, self.env) # final timestep ckpt self.agent.body.log_summary() def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent.close() self.env.close() logger.info('Session done and closed.') def run(self): while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick: self.run_episode() if util.get_lab_mode() not in ('enjoy', 'eval') and analysis.all_solved( self.agent): logger.info('All environments solved. Early exit.') break if self.eval_proc is not None: # wait for final eval before closing util.run_cmd_wait(self.eval_proc) self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_logger(self.spec, self.info_space, logger, 'session') self.data = None # init singleton agent and env self.env = make_env(self.spec) util.set_rand_seed(self.info_space.get_random_seed(), self.env) with util.ctx_lab_mode('eval'): # env for eval self.eval_env = make_env(self.spec) util.set_rand_seed(self.info_space.get_random_seed(), self.eval_env) util.try_set_cuda_id(self.spec, self.info_space) body = Body(self.env, self.spec['agent']) self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def try_ckpt(self, agent, env): '''Try to checkpoint agent at the start, save_freq, and the end''' tick = env.clock.get(env.max_tick_unit) to_ckpt = False if not util.in_eval_lab_modes() and tick <= env.max_tick: to_ckpt = (tick % env.eval_frequency == 0) or tick == env.max_tick if env.max_tick_unit == 'epi': # extra condition for epi to_ckpt = to_ckpt and env.done if to_ckpt: if self.spec['meta'].get('parallel_eval'): retro_analysis.run_parallel_eval(self, agent, env) else: self.run_eval_episode() if analysis.new_best(agent): agent.save(ckpt='best') if tick > 0: # nothing to analyze at start analysis.analyze_session(self, eager_analyze_trial=True) def run_eval_episode(self): with util.ctx_lab_mode('eval'): # enter eval context self.agent.algorithm.update( ) # set explore_var etc. to end_val under ctx self.eval_env.clock.tick('epi') logger.info( f'Running eval episode for trial {self.info_space.get("trial")} session {self.index}' ) total_reward = 0 reward, state, done = self.eval_env.reset() while not done: self.eval_env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.eval_env.step(action) total_reward += reward # exit eval context, restore variables simply by updating self.agent.algorithm.update() # update body.eval_df self.agent.body.eval_update(self.eval_env, total_reward) self.agent.body.log_summary(body_df_kind='eval') def run_episode(self): self.env.clock.tick('epi') logger.info( f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.epi}' ) reward, state, done = self.env.reset() self.agent.reset(state) while not done: self.try_ckpt(self.agent, self.env) self.env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.env.step(action) self.agent.update(action, reward, state, done) self.try_ckpt(self.agent, self.env) # final timestep ckpt self.agent.body.log_summary(body_df_kind='train') def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent.close() self.env.close() self.eval_env.close() logger.info('Session done and closed.') def run(self): while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick: self.run_episode() retro_analysis.try_wait_parallel_eval(self) self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_session_logger(self.spec, self.info_space, logger) util.set_module_seed(self.info_space.get_random_seed()) util.try_set_cuda_id(self.spec, self.info_space) self.data = None # init singleton agent and env self.env = make_env(self.spec) body = Body(self.env, self.spec['agent']) self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def save_if_ckpt(self, agent, env): '''Save for agent, env if episode is at checkpoint''' epi = env.clock.get('epi') save_this_epi = env.done and epi != env.max_episode and epi > 0 and hasattr( env, 'save_epi_frequency') and epi % env.save_epi_frequency == 0 if save_this_epi: agent.save(ckpt='last') analysis.analyze_session(self) def run_episode(self): self.env.clock.tick('epi') logger.info( f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.get("epi")}' ) reward, state, done = self.env.reset() self.agent.reset(state) while not done: self.env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.env.step(action) self.agent.update(action, reward, state, done) self.agent.body.log_summary() self.save_if_ckpt(self.agent, self.env) def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent.close() self.env.close() logger.info('Session done and closed.') def run(self): while self.env.clock.get('epi') < self.env.max_episode: self.run_episode() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data