Beispiel #1
0
class Session:
    '''The class which initializes the agent, environment, and runs them.'''
    def __init__(self, spec, info_space):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')

        # init singleton agent and env
        self.env = OpenAIEnv(self.spec)
        body = Body(self.env, self.spec['agent'])
        self.agent = Agent(self.spec, self.info_space, body=body)

        enable_aeb_space(self)  # to use lab's data analysis framework
        logger.info(f'Initialized session {self.index}')

    def run_episode(self):
        self.env.clock.tick('epi')
        reward, state, done = self.env.reset()
        self.agent.reset(state)
        while not done:
            self.env.clock.tick('t')
            action = self.agent.act(state)
            reward, state, done = self.env.step(action)
            self.agent.update(action, reward, state, done)
        self.agent.body.log_summary()

    def close(self):
        self.agent.close()
        self.env.close()
        logger.info('Session done and closed.')

    def run(self, time_limit):
        t0 = time.time()
        while time.time() < t0 + time_limit:
            self.run_episode()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data, self.agent

    def update_env(self, env):
        self.env.u_env = env
        self.agent.body.env.u_env = env
Beispiel #2
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_session_logger(self.spec, self.info_space, logger)
        self.data = None

        # init singleton agent and env
        self.env = make_env(self.spec)
        body = Body(self.env, self.spec['agent'])
        util.set_rand_seed(self.info_space.get_random_seed(), self.env)
        util.try_set_cuda_id(self.spec, self.info_space)
        assert not ps.is_list(
            global_nets
        ), f'single agent global_nets must be a dict, got {global_nets}'
        self.agent = Agent(self.spec,
                           self.info_space,
                           body=body,
                           global_nets=global_nets)

        enable_aeb_space(self)  # to use lab's data analysis framework
        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def save_if_ckpt(self, agent, env):
        '''Save for agent, env if episode is at checkpoint'''
        tick = env.clock.get(env.max_tick_unit)
        if hasattr(env, 'save_frequency') and 0 < tick < env.max_tick:
            if env.max_tick_unit == 'epi':
                to_save = (env.done and tick % env.save_frequency == 0)
            else:
                to_save = (tick % env.save_frequency == 0)
        else:
            to_save = False
        if to_save:
            agent.save(ckpt='last')
            if analysis.new_best(agent):
                agent.save(ckpt='best')
            analysis.analyze_session(self)

    def run_episode(self):
        self.env.clock.tick('epi')
        logger.info(
            f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.get("epi")}'
        )
        reward, state, done = self.env.reset()
        self.agent.reset(state)
        while not done:
            self.env.clock.tick('t')
            action = self.agent.act(state)
            reward, state, done = self.env.step(action)
            self.agent.update(action, reward, state, done)
            self.save_if_ckpt(self.agent, self.env)
        self.agent.body.log_summary()

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent.close()
        self.env.close()
        logger.info('Session done and closed.')

    def run(self):
        while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick:
            self.run_episode()
            if util.get_lab_mode() not in ('enjoy',
                                           'eval') and analysis.all_solved(
                                               self.agent):
                logger.info('All environments solved. Early exit.')
                break
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
Beispiel #3
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_logger(self.spec, self.info_space, logger, 'session')
        self.data = None

        # init singleton agent and env
        self.env = make_env(self.spec)
        util.set_rand_seed(self.info_space.get_random_seed(), self.env)
        with util.ctx_lab_mode('eval'):  # env for eval
            self.eval_env = make_env(self.spec)
            util.set_rand_seed(self.info_space.get_random_seed(),
                               self.eval_env)
        util.try_set_cuda_id(self.spec, self.info_space)
        body = Body(self.env, self.spec['agent'])
        self.agent = Agent(self.spec,
                           self.info_space,
                           body=body,
                           global_nets=global_nets)

        enable_aeb_space(self)  # to use lab's data analysis framework
        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def try_ckpt(self, agent, env):
        '''Try to checkpoint agent at the start, save_freq, and the end'''
        tick = env.clock.get(env.max_tick_unit)
        to_ckpt = False
        if not util.in_eval_lab_modes() and tick <= env.max_tick:
            to_ckpt = (tick % env.eval_frequency == 0) or tick == env.max_tick
        if env.max_tick_unit == 'epi':  # extra condition for epi
            to_ckpt = to_ckpt and env.done

        if to_ckpt:
            if self.spec['meta'].get('parallel_eval'):
                retro_analysis.run_parallel_eval(self, agent, env)
            else:
                self.run_eval_episode()
            if analysis.new_best(agent):
                agent.save(ckpt='best')
            if tick > 0:  # nothing to analyze at start
                analysis.analyze_session(self, eager_analyze_trial=True)

    def run_eval_episode(self):
        with util.ctx_lab_mode('eval'):  # enter eval context
            self.agent.algorithm.update(
            )  # set explore_var etc. to end_val under ctx
            self.eval_env.clock.tick('epi')
            logger.info(
                f'Running eval episode for trial {self.info_space.get("trial")} session {self.index}'
            )
            total_reward = 0
            reward, state, done = self.eval_env.reset()
            while not done:
                self.eval_env.clock.tick('t')
                action = self.agent.act(state)
                reward, state, done = self.eval_env.step(action)
                total_reward += reward
        # exit eval context, restore variables simply by updating
        self.agent.algorithm.update()
        # update body.eval_df
        self.agent.body.eval_update(self.eval_env, total_reward)
        self.agent.body.log_summary(body_df_kind='eval')

    def run_episode(self):
        self.env.clock.tick('epi')
        logger.info(
            f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.epi}'
        )
        reward, state, done = self.env.reset()
        self.agent.reset(state)
        while not done:
            self.try_ckpt(self.agent, self.env)
            self.env.clock.tick('t')
            action = self.agent.act(state)
            reward, state, done = self.env.step(action)
            self.agent.update(action, reward, state, done)
        self.try_ckpt(self.agent, self.env)  # final timestep ckpt
        self.agent.body.log_summary(body_df_kind='train')

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent.close()
        self.env.close()
        self.eval_env.close()
        logger.info('Session done and closed.')

    def run(self):
        while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick:
            self.run_episode()
        retro_analysis.try_wait_parallel_eval(self)
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
Beispiel #4
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_logger(self.spec, self.info_space, logger, 'session')
        self.data = None
        self.eval_proc = None  # reference run_online_eval process

        # init singleton agent and env
        self.env = make_env(self.spec)
        body = Body(self.env, self.spec['agent'])
        util.set_rand_seed(self.info_space.get_random_seed(), self.env)
        util.try_set_cuda_id(self.spec, self.info_space)
        self.agent = Agent(self.spec,
                           self.info_space,
                           body=body,
                           global_nets=global_nets)

        enable_aeb_space(self)  # to use lab's data analysis framework
        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def try_ckpt(self, agent, env):
        '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end'''
        clock = env.clock
        tick = clock.get(env.max_tick_unit)
        to_ckpt = False
        if util.get_lab_mode() not in ('enjoy',
                                       'eval') and tick <= env.max_tick:
            to_ckpt = (tick % env.save_frequency == 0) or tick == env.max_tick
        if env.max_tick_unit == 'epi':  # extra condition for epi
            to_ckpt = to_ckpt and env.done

        if to_ckpt:
            if analysis.new_best(agent):
                agent.save(ckpt='best')
            # run online eval for train mode
            if util.get_lab_mode() == 'train' and self.spec['meta'].get(
                    'training_eval', False):
                ckpt = f'epi{clock.epi}-totalt{clock.total_t}'
                agent.save(ckpt=ckpt)
                # set reference to eval process for handling
                self.eval_proc = analysis.run_online_eval(
                    self.spec, self.info_space, ckpt)
            if tick > 0:  # nothing to analyze at start
                analysis.analyze_session(self)

    def run_episode(self):
        self.env.clock.tick('epi')
        logger.info(
            f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.epi}'
        )
        reward, state, done = self.env.reset()
        self.agent.reset(state)
        while not done:
            self.try_ckpt(self.agent, self.env)
            self.env.clock.tick('t')
            action = self.agent.act(state)
            reward, state, done = self.env.step(action)
            self.agent.update(action, reward, state, done)
        self.try_ckpt(self.agent, self.env)  # final timestep ckpt
        self.agent.body.log_summary()

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent.close()
        self.env.close()
        logger.info('Session done and closed.')

    def run(self):
        while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick:
            self.run_episode()
            if util.get_lab_mode() not in ('enjoy',
                                           'eval') and analysis.all_solved(
                                               self.agent):
                logger.info('All environments solved. Early exit.')
                break
        if self.eval_proc is not None:  # wait for final eval before closing
            util.run_cmd_wait(self.eval_proc)
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
Beispiel #5
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_session_logger(self.spec, self.info_space, logger)
        util.set_module_seed(self.info_space.get_random_seed())
        util.try_set_cuda_id(self.spec, self.info_space)
        self.data = None

        # init singleton agent and env
        self.env = make_env(self.spec)
        body = Body(self.env, self.spec['agent'])
        self.agent = Agent(self.spec,
                           self.info_space,
                           body=body,
                           global_nets=global_nets)

        enable_aeb_space(self)  # to use lab's data analysis framework
        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def save_if_ckpt(self, agent, env):
        '''Save for agent, env if episode is at checkpoint'''
        epi = env.clock.get('epi')
        save_this_epi = env.done and epi != env.max_episode and epi > 0 and hasattr(
            env, 'save_epi_frequency') and epi % env.save_epi_frequency == 0
        if save_this_epi:
            agent.save(ckpt='last')
            analysis.analyze_session(self)

    def run_episode(self):
        self.env.clock.tick('epi')
        logger.info(
            f'Running trial {self.info_space.get("trial")} session {self.index} episode {self.env.clock.get("epi")}'
        )
        reward, state, done = self.env.reset()
        self.agent.reset(state)
        while not done:
            self.env.clock.tick('t')
            action = self.agent.act(state)
            reward, state, done = self.env.step(action)
            self.agent.update(action, reward, state, done)
        self.agent.body.log_summary()
        self.save_if_ckpt(self.agent, self.env)

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent.close()
        self.env.close()
        logger.info('Session done and closed.')

    def run(self):
        while self.env.clock.get('epi') < self.env.max_episode:
            self.run_episode()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data