Example #1
0
def _retro_analyze_session(session_spec_path):
    '''Method to retro analyze a single session given only a path to its spec'''
    session_spec = util.read(session_spec_path)
    info_prepath = session_spec['meta']['info_prepath']
    for df_mode in ('eval', 'train'):
        session_df = util.read(f'{info_prepath}_session_df_{df_mode}.csv')
        analysis.analyze_session(session_spec, session_df, df_mode)
Example #2
0
    def try_ckpt(self, agent, env):
        '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end'''
        clock = env.clock
        tick = clock.get(env.max_tick_unit)
        to_ckpt = False
        # print("tick", tick, "save freq", env.save_frequency)
        if util.get_lab_mode() not in ('enjoy',
                                       'eval') and tick <= env.max_tick:
            to_ckpt = (tick % env.save_frequency == 0) or tick == env.max_tick
        if env.max_tick_unit == 'epi':  # extra condition for epi
            to_ckpt = to_ckpt and env.done

        if to_ckpt:
            if analysis.new_best(agent):
                agent.save(ckpt='best')
            # run online eval for train mode
            if util.get_lab_mode() == 'train' and self.spec['meta'].get(
                    'training_eval', False):
                ckpt = f'epi{clock.epi}-totalt{clock.total_t}'
                agent.save(ckpt=ckpt)
                # set reference to eval process for handling
                self.eval_proc = analysis.run_online_eval(
                    self.spec, self.info_space, ckpt)
            if tick > 0:  # nothing to analyze at start
                analysis.analyze_session(self)
Example #3
0
    def try_ckpt(self, agent, env):
        '''Check then run checkpoint log/eval'''
        body = agent.body
        if self.to_ckpt(env, 'log'):
            body.train_ckpt()
            body.log_summary('train')

        if self.to_ckpt(env, 'eval'):
            logger.info('Running eval ckpt')
            avg_return = analysis.gen_avg_return(agent, self.eval_env)
            body.eval_ckpt(self.eval_env, avg_return)
            body.log_summary('eval')
            if body.eval_reward_ma >= body.best_reward_ma:
                body.best_reward_ma = body.eval_reward_ma
                agent.save(ckpt='best')
            if len(body.train_df) > 1:  # need > 1 row to calculate stability
                metrics = analysis.analyze_session(self.spec,
                                                   body.train_df,
                                                   'train',
                                                   plot=False)
                body.log_metrics(metrics['scalar'], 'train')
            if len(body.eval_df) > 1:  # need > 1 row to calculate stability
                metrics = analysis.analyze_session(self.spec,
                                                   body.eval_df,
                                                   'eval',
                                                   plot=False)
                body.log_metrics(metrics['scalar'], 'eval')
Example #4
0
    def try_ckpt(self, agent, env):
        '''Check then run checkpoint log/eval'''
        body = agent.body
        if self.to_ckpt(env, 'log'):
            body.ckpt(self.env, 'train')
            body.log_summary('train')
            if len(body.train_df) > 2:  # need more rows to calculate metrics
                metrics = analysis.analyze_session(self.spec,
                                                   body.train_df,
                                                   'train',
                                                   plot=False)
                body.log_metrics(metrics['scalar'], 'train')

        if self.to_ckpt(env, 'eval'):
            logger.info('Running eval ckpt')
            if ps.get(self.spec, 'meta.rigorous_eval'):
                analysis.gen_avg_return(agent, self.eval_env)
            body.ckpt(self.eval_env, 'eval')
            body.log_summary('eval')
            if body.total_reward_ma >= body.best_total_reward_ma:
                body.best_total_reward_ma = body.total_reward_ma
                agent.save(ckpt='best')
            if len(body.eval_df) > 2:  # need more rows to calculate metrics
                metrics = analysis.analyze_session(self.spec,
                                                   body.eval_df,
                                                   'eval',
                                                   plot=False)
                body.log_metrics(metrics['scalar'], 'eval')
Example #5
0
 def save_if_ckpt(self, agent, env):
     '''Save for agent, env if episode is at checkpoint'''
     epi = env.clock.get('epi')
     save_this_epi = env.done and epi != env.max_episode and epi > 0 and hasattr(
         env, 'save_epi_frequency') and epi % env.save_epi_frequency == 0
     if save_this_epi:
         agent.save(ckpt='last')
         analysis.analyze_session(self)
Example #6
0
 def save_if_ckpt(self, agent, env):
     '''Save for agent, env if episode is at checkpoint'''
     tick = env.clock.get(env.max_tick_unit)
     if hasattr(env, 'save_frequency') and 0 < tick < env.max_tick:
         if env.max_tick_unit == 'epi':
             to_save = (env.done and tick % env.save_frequency == 0)
         else:
             to_save = (tick % env.save_frequency == 0)
     else:
         to_save = False
     if to_save:
         agent.save(ckpt='last')
         if analysis.new_best(agent):
             agent.save(ckpt='best')
         analysis.analyze_session(self)
Example #7
0
 def run(self, time_limit):
     t0 = time.time()
     while time.time() < t0 + time_limit:
         self.run_episode()
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data, self.agent
Example #8
0
 def run(self):
     self.run_rl()
     metrics = analysis.analyze_session(self.spec, self.agent.body.eval_df,
                                        'eval')
     self.agent.body.log_metrics(metrics['scalar'], 'eval')
     self.close()
     return metrics
Example #9
0
 def run(self):
     while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick:
         self.run_episode()
     retro_analysis.try_wait_parallel_eval(self)
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data
Example #10
0
    def try_ckpt(self, agent, env):
        '''Try to checkpoint agent at the start, save_freq, and the end'''
        tick = env.clock.get(env.max_tick_unit)
        to_ckpt = False
        if not util.in_eval_lab_modes() and tick <= env.max_tick:
            to_ckpt = (tick % env.eval_frequency == 0) or tick == env.max_tick
        if env.max_tick_unit == 'epi':  # extra condition for epi
            to_ckpt = to_ckpt and env.done

        if to_ckpt:
            if self.spec['meta'].get('parallel_eval'):
                retro_analysis.run_parallel_eval(self, agent, env)
            else:
                self.run_eval_episode()
            if analysis.new_best(agent):
                agent.save(ckpt='best')
            if tick > 0:  # nothing to analyze at start
                analysis.analyze_session(self, eager_analyze_trial=True)
Example #11
0
 def run(self):
     while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick:
         self.run_episode()
         if util.get_lab_mode() not in ('enjoy',
                                        'eval') and analysis.all_solved(
                                            self.agent):
             logger.info('All environments solved. Early exit.')
             break
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data
Example #12
0
 def run(self):
     while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick:
         self.run_episode()
         if util.get_lab_mode() not in ('enjoy',
                                        'eval') and analysis.all_solved(
                                            self.agent):
             logger.info('All environments solved. Early exit.')
             break
     if self.eval_proc is not None:  # wait for final eval before closing
         util.run_cmd_wait(self.eval_proc)
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data
Example #13
0
 def run(self):
     self.run_all_episodes()
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data
Example #14
0
 def run(self):
     self.run_all_episodes()
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data
Example #15
0
 def run(self):
     self.run_all_episodes()
     self.df, self.fitness_df = analysis.analyze_session(self)
     self.close()
     return self.df, self.fitness_df
Example #16
0
 def run(self):
     self.run_all_episodes()
     self.data = analysis.analyze_session(
         self, tmp_space_session_sub=True)  # session fitness
     self.close()
     return self.data
Example #17
0
 def run(self):
     while self.env.clock.get('epi') < self.env.max_episode:
         self.run_episode()
     self.data = analysis.analyze_session(self)  # session fitness
     self.close()
     return self.data