예제 #1
0
class SpaceSession(Session):
    '''Session for multi-agent/env setting'''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_session_logger(self.spec, self.info_space, logger)
        self.data = None

        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.aeb_space.init_body_space()
        util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
        util.try_set_cuda_id(self.spec, self.info_space)
        assert not ps.is_dict(
            global_nets
        ), f'multi agent global_nets must be a list of dicts, got {global_nets}'
        self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def save_if_ckpt(self, agent_space, env_space):
        '''Save for agent, env if episode is at checkpoint'''
        for agent in agent_space.agents:
            for body in agent.nanflat_body_a:
                env = body.env
                super(SpaceSession, self).save_if_ckpt(agent, env)

    def run_all_episodes(self):
        '''
        Continually run all episodes, where each env can step and reset at its own clock_speed and timeline.
        Will terminate when all envs done are done.
        '''
        all_done = self.aeb_space.tick('epi')
        reward_space, state_space, done_space = self.env_space.reset()
        self.agent_space.reset(state_space)
        while not all_done:
            all_done = self.aeb_space.tick()
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)
            self.save_if_ckpt(self.agent_space, self.env_space)

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done and closed.')

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
예제 #2
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space=None):
        info_space = info_space or InfoSpace()
        init_thread_vars(spec, info_space, unit='session')
        self.spec = deepcopy(spec)
        self.info_space = info_space
        self.coor, self.index = self.info_space.get_coor_idx(self)
        self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
        torch.cuda.manual_seed_all(self.random_seed)
        torch.manual_seed(self.random_seed)
        np.random.seed(self.random_seed)
        self.data = None
        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space)
        logger.info(util.self_desc(self))
        self.aeb_space.init_body_space()
        self.aeb_space.post_body_init()
        logger.info(f'Initialized session {self.index}')

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        Prepare self.df.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done, closing.')

    def run_all_episodes(self):
        '''
        Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode.
        '''
        _reward_space, state_space, _done_space = self.env_space.reset()
        _action_space = self.agent_space.reset(
            state_space)  # nan action at t=0 for bookkeeping in data_space
        while True:
            end_session = self.aeb_space.tick_clocks(self)
            if end_session:
                break
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
예제 #3
0
def test_aeb_space(test_spec):
    global aeb_space
    if aeb_space is None:
        aeb_space = AEBSpace(test_spec, InfoSpace())
        env_space = EnvSpace(test_spec, aeb_space)
        agent_space = AgentSpace(test_spec, aeb_space)
        aeb_space.init_body_space()
    return aeb_space
예제 #4
0
def test_aeb_space(test_spec):
    global aeb_space
    if aeb_space is None:
        aeb_space = AEBSpace(test_spec, InfoSpace())
        env_space = EnvSpace(test_spec, aeb_space)
        agent_space = AgentSpace(test_spec, aeb_space)
        aeb_space.init_body_space()
    return aeb_space
예제 #5
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''

    def __init__(self, spec, info_space=None):
        info_space = info_space or InfoSpace()
        init_thread_vars(spec, info_space, unit='session')
        self.spec = deepcopy(spec)
        self.info_space = info_space
        self.coor, self.index = self.info_space.get_coor_idx(self)
        self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
        torch.cuda.manual_seed_all(self.random_seed)
        torch.manual_seed(self.random_seed)
        np.random.seed(self.random_seed)
        self.data = None
        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space)
        logger.info(util.self_desc(self))
        self.aeb_space.init_body_space()
        self.aeb_space.post_body_init()
        logger.info(f'Initialized session {self.index}')

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        Prepare self.df.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done, closing.')

    def run_all_episodes(self):
        '''
        Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode.
        '''
        _reward_space, state_space, _done_space = self.env_space.reset()
        _action_space = self.agent_space.reset(state_space)  # nan action at t=0 for bookkeeping in data_space
        while True:
            end_session = self.aeb_space.tick_clocks(self)
            if end_session:
                break
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(action_space)
            self.agent_space.update(action_space, reward_space, state_space, done_space)

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
예제 #6
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space=InfoSpace()):
        self.spec = spec
        if info_space.get('session') is None:
            info_space.tick('session')
        self.info_space = info_space
        self.coor, self.index = self.info_space.get_coor_idx(self)
        # TODO option to set rand_seed. also set np random seed
        self.torch_rand_seed = torch.initial_seed()
        self.data = None
        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space)
        logger.info(util.self_desc(self))
        self.aeb_space.init_body_space()
        self.aeb_space.post_body_init()

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        Prepare self.df.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done, closing.')

    def run_all_episodes(self):
        '''
        Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode.
        '''
        _reward_space, state_space, _done_space = self.env_space.reset()
        self.agent_space.reset(state_space)
        while True:
            end_session = self.aeb_space.tick_clocks()
            if end_session:
                break
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
예제 #7
0
def test_prioritized_replay_memory(request):
    memspec = spec_util.get('base.json', 'base_prioritized_replay_memory')
    memspec = util.override_test_spec(memspec)
    aeb_mem_space = AEBSpace(memspec, InfoSpace())
    env_space = EnvSpace(memspec, aeb_mem_space)
    aeb_mem_space.init_body_space()
    agent_space = AgentSpace(memspec, aeb_mem_space)
    agent = agent_space.agents[0]
    body = agent.nanflat_body_a[0]
    res = (body.memory, ) + request.param
    return res
예제 #8
0
 def __init__(self, spec):
     self.spec = spec
     self.coor, self.index = info_space.index_lab_comp(self)
     self.df = None
     self.fitness_df = None
     self.aeb_space = AEBSpace(self.spec)
     self.env_space = EnvSpace(self.spec, self.aeb_space)
     self.agent_space = AgentSpace(self.spec, self.aeb_space)
     logger.info(util.self_desc(self))
     self.aeb_space.init_body_space()
     self.aeb_space.post_body_init()
예제 #9
0
def test_prioritized_replay_memory(request):
    memspec = spec_util.get('base.json', 'base_prioritized_replay_memory')
    memspec = util.override_test_spec(memspec)
    aeb_mem_space = AEBSpace(memspec, InfoSpace())
    env_space = EnvSpace(memspec, aeb_mem_space)
    agent_space = AgentSpace(memspec, aeb_mem_space)
    aeb_mem_space.init_body_space()
    aeb_mem_space.post_body_init()
    agent = agent_space.agents[0]
    body = agent.nanflat_body_a[0]
    res = (body.memory, ) + request.param
    return res
예제 #10
0
 def __init__(self, spec, info_space=InfoSpace()):
     self.spec = spec
     if info_space.get('session') is None:
         info_space.tick('session')
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     # TODO option to set rand_seed. also set np random seed
     self.torch_rand_seed = torch.initial_seed()
     self.data = None
     self.aeb_space = AEBSpace(self.spec, self.info_space)
     self.env_space = EnvSpace(self.spec, self.aeb_space)
     self.agent_space = AgentSpace(self.spec, self.aeb_space)
     logger.info(util.self_desc(self))
     self.aeb_space.init_body_space()
     self.aeb_space.post_body_init()
예제 #11
0
 def __init__(self, spec, info_space=None):
     info_space = info_space or InfoSpace()
     init_thread_vars(spec, info_space, unit='session')
     self.spec = spec
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     # TODO option to set rand_seed. also set np random seed
     self.torch_rand_seed = torch.initial_seed()
     self.data = None
     self.aeb_space = AEBSpace(self.spec, self.info_space)
     self.env_space = EnvSpace(self.spec, self.aeb_space)
     self.agent_space = AgentSpace(self.spec, self.aeb_space)
     logger.info(util.self_desc(self))
     self.aeb_space.init_body_space()
     self.aeb_space.post_body_init()
     logger.info(f'Initialized session {self.index}')
예제 #12
0
파일: control.py 프로젝트: vmuthuk2/SLM-Lab
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_logger(self.spec, self.info_space, logger, 'session')
        self.data = None

        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.aeb_space.init_body_space()
        util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
        util.try_set_cuda_id(self.spec, self.info_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')
예제 #13
0
 def __init__(self, spec, info_space=None):
     info_space = info_space or InfoSpace()
     init_thread_vars(spec, info_space, unit='session')
     self.spec = deepcopy(spec)
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
     torch.cuda.manual_seed_all(self.random_seed)
     torch.manual_seed(self.random_seed)
     np.random.seed(self.random_seed)
     self.data = None
     self.aeb_space = AEBSpace(self.spec, self.info_space)
     self.env_space = EnvSpace(self.spec, self.aeb_space)
     self.agent_space = AgentSpace(self.spec, self.aeb_space)
     logger.info(util.self_desc(self))
     self.aeb_space.init_body_space()
     self.aeb_space.post_body_init()
     logger.info(f'Initialized session {self.index}')
예제 #14
0
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_session_logger(self.spec, self.info_space, logger)
        self.data = None

        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.aeb_space.init_body_space()
        util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
        util.try_set_cuda_id(self.spec, self.info_space)
        assert not ps.is_dict(
            global_nets
        ), f'multi agent global_nets must be a list of dicts, got {global_nets}'
        self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')
예제 #15
0
 def __init__(self, spec, info_space=None):
     info_space = info_space or InfoSpace()
     init_thread_vars(spec, info_space, unit='session')
     self.spec = deepcopy(spec)
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
     torch.cuda.manual_seed_all(self.random_seed)
     torch.manual_seed(self.random_seed)
     np.random.seed(self.random_seed)
     self.data = None
     self.aeb_space = AEBSpace(self.spec, self.info_space)
     self.env_space = EnvSpace(self.spec, self.aeb_space)
     self.agent_space = AgentSpace(self.spec, self.aeb_space)
     logger.info(util.self_desc(self))
     self.aeb_space.init_body_space()
     self.aeb_space.post_body_init()
     logger.info(f'Initialized session {self.index}')
예제 #16
0
def test_data_space(test_spec):
    aeb_space = AEBSpace(test_spec)
예제 #17
0
class SpaceSession(Session):
    '''Session for multi-agent/env setting'''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_logger(self.spec, self.info_space, logger, 'session')
        self.data = None
        self.eval_proc = None  # reference run_online_eval process

        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.aeb_space.init_body_space()
        util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
        util.try_set_cuda_id(self.spec, self.info_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def try_ckpt(self, agent_space, env_space):
        '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end'''
        for agent in agent_space.agents:
            for body in agent.nanflat_body_a:
                env = body.env
                super(SpaceSession, self).try_ckpt(agent, env)

    def run_all_episodes(self):
        '''
        Continually run all episodes, where each env can step and reset at its own clock_speed and timeline.
        Will terminate when all envs done are done.
        '''
        all_done = self.aeb_space.tick('epi')
        reward_space, state_space, done_space = self.env_space.reset()
        self.agent_space.reset(state_space)
        while not all_done:
            self.try_ckpt(self.agent_space, self.env_space)
            all_done = self.aeb_space.tick()
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)
        self.try_ckpt(self.agent_space, self.env_space)
        if self.eval_proc is not None:  # wait for final eval before closing
            util.run_cmd_wait(self.eval_proc)

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done and closed.')

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data