class SpaceSession(Session): '''Session for multi-agent/env setting''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_session_logger(self.spec, self.info_space, logger) self.data = None self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.aeb_space.init_body_space() util.set_rand_seed(self.info_space.get_random_seed(), self.env_space) util.try_set_cuda_id(self.spec, self.info_space) assert not ps.is_dict( global_nets ), f'multi agent global_nets must be a list of dicts, got {global_nets}' self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets) logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def save_if_ckpt(self, agent_space, env_space): '''Save for agent, env if episode is at checkpoint''' for agent in agent_space.agents: for body in agent.nanflat_body_a: env = body.env super(SpaceSession, self).save_if_ckpt(agent, env) def run_all_episodes(self): ''' Continually run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done are done. ''' all_done = self.aeb_space.tick('epi') reward_space, state_space, done_space = self.env_space.reset() self.agent_space.reset(state_space) while not all_done: all_done = self.aeb_space.tick() action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step( action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) self.save_if_ckpt(self.agent_space, self.env_space) def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent_space.close() self.env_space.close() logger.info('Session done and closed.') def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space=None): info_space = info_space or InfoSpace() init_thread_vars(spec, info_space, unit='session') self.spec = deepcopy(spec) self.info_space = info_space self.coor, self.index = self.info_space.get_coor_idx(self) self.random_seed = 100 * (info_space.get('trial') or 0) + self.index torch.cuda.manual_seed_all(self.random_seed) torch.manual_seed(self.random_seed) np.random.seed(self.random_seed) self.data = None self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.agent_space = AgentSpace(self.spec, self.aeb_space) logger.info(util.self_desc(self)) self.aeb_space.init_body_space() self.aeb_space.post_body_init() logger.info(f'Initialized session {self.index}') def close(self): ''' Close session and clean up. Save agent, close env. Prepare self.df. ''' self.agent_space.close() self.env_space.close() logger.info('Session done, closing.') def run_all_episodes(self): ''' Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode. ''' _reward_space, state_space, _done_space = self.env_space.reset() _action_space = self.agent_space.reset( state_space) # nan action at t=0 for bookkeeping in data_space while True: end_session = self.aeb_space.tick_clocks(self) if end_session: break action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step( action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space=None): info_space = info_space or InfoSpace() init_thread_vars(spec, info_space, unit='session') self.spec = deepcopy(spec) self.info_space = info_space self.coor, self.index = self.info_space.get_coor_idx(self) self.random_seed = 100 * (info_space.get('trial') or 0) + self.index torch.cuda.manual_seed_all(self.random_seed) torch.manual_seed(self.random_seed) np.random.seed(self.random_seed) self.data = None self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.agent_space = AgentSpace(self.spec, self.aeb_space) logger.info(util.self_desc(self)) self.aeb_space.init_body_space() self.aeb_space.post_body_init() logger.info(f'Initialized session {self.index}') def close(self): ''' Close session and clean up. Save agent, close env. Prepare self.df. ''' self.agent_space.close() self.env_space.close() logger.info('Session done, closing.') def run_all_episodes(self): ''' Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode. ''' _reward_space, state_space, _done_space = self.env_space.reset() _action_space = self.agent_space.reset(state_space) # nan action at t=0 for bookkeeping in data_space while True: end_session = self.aeb_space.tick_clocks(self) if end_session: break action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step(action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class Session: ''' The base unit of instantiated RL system. Given a spec, session creates agent(s) and environment(s), run the RL system and collect data, e.g. fitness metrics, till it ends, then return the session data. ''' def __init__(self, spec, info_space=InfoSpace()): self.spec = spec if info_space.get('session') is None: info_space.tick('session') self.info_space = info_space self.coor, self.index = self.info_space.get_coor_idx(self) # TODO option to set rand_seed. also set np random seed self.torch_rand_seed = torch.initial_seed() self.data = None self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.agent_space = AgentSpace(self.spec, self.aeb_space) logger.info(util.self_desc(self)) self.aeb_space.init_body_space() self.aeb_space.post_body_init() def close(self): ''' Close session and clean up. Save agent, close env. Prepare self.df. ''' self.agent_space.close() self.env_space.close() logger.info('Session done, closing.') def run_all_episodes(self): ''' Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode. ''' _reward_space, state_space, _done_space = self.env_space.reset() self.agent_space.reset(state_space) while True: end_session = self.aeb_space.tick_clocks() if end_session: break action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step( action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class SpaceSession(Session): '''Session for multi-agent/env setting''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_logger(self.spec, self.info_space, logger, 'session') self.data = None self.eval_proc = None # reference run_online_eval process self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.aeb_space.init_body_space() util.set_rand_seed(self.info_space.get_random_seed(), self.env_space) util.try_set_cuda_id(self.spec, self.info_space) self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets) logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def try_ckpt(self, agent_space, env_space): '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end''' for agent in agent_space.agents: for body in agent.nanflat_body_a: env = body.env super(SpaceSession, self).try_ckpt(agent, env) def run_all_episodes(self): ''' Continually run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done are done. ''' all_done = self.aeb_space.tick('epi') reward_space, state_space, done_space = self.env_space.reset() self.agent_space.reset(state_space) while not all_done: self.try_ckpt(self.agent_space, self.env_space) all_done = self.aeb_space.tick() action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step( action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) self.try_ckpt(self.agent_space, self.env_space) if self.eval_proc is not None: # wait for final eval before closing util.run_cmd_wait(self.eval_proc) def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent_space.close() self.env_space.close() logger.info('Session done and closed.') def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data