def _retro_analyze_session(session_spec_path): '''Method to retro analyze a single session given only a path to its spec''' session_spec = util.read(session_spec_path) info_prepath = session_spec['meta']['info_prepath'] for df_mode in ('eval', 'train'): session_df = util.read(f'{info_prepath}_session_df_{df_mode}.csv') analysis.analyze_session(session_spec, session_df, df_mode)
def try_ckpt(self, agent, env): '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end''' clock = env.clock tick = clock.get(env.max_tick_unit) to_ckpt = False # print("tick", tick, "save freq", env.save_frequency) if util.get_lab_mode() not in ('enjoy', 'eval') and tick <= env.max_tick: to_ckpt = (tick % env.save_frequency == 0) or tick == env.max_tick if env.max_tick_unit == 'epi': # extra condition for epi to_ckpt = to_ckpt and env.done if to_ckpt: if analysis.new_best(agent): agent.save(ckpt='best') # run online eval for train mode if util.get_lab_mode() == 'train' and self.spec['meta'].get( 'training_eval', False): ckpt = f'epi{clock.epi}-totalt{clock.total_t}' agent.save(ckpt=ckpt) # set reference to eval process for handling self.eval_proc = analysis.run_online_eval( self.spec, self.info_space, ckpt) if tick > 0: # nothing to analyze at start analysis.analyze_session(self)
def try_ckpt(self, agent, env): '''Check then run checkpoint log/eval''' body = agent.body if self.to_ckpt(env, 'log'): body.train_ckpt() body.log_summary('train') if self.to_ckpt(env, 'eval'): logger.info('Running eval ckpt') avg_return = analysis.gen_avg_return(agent, self.eval_env) body.eval_ckpt(self.eval_env, avg_return) body.log_summary('eval') if body.eval_reward_ma >= body.best_reward_ma: body.best_reward_ma = body.eval_reward_ma agent.save(ckpt='best') if len(body.train_df) > 1: # need > 1 row to calculate stability metrics = analysis.analyze_session(self.spec, body.train_df, 'train', plot=False) body.log_metrics(metrics['scalar'], 'train') if len(body.eval_df) > 1: # need > 1 row to calculate stability metrics = analysis.analyze_session(self.spec, body.eval_df, 'eval', plot=False) body.log_metrics(metrics['scalar'], 'eval')
def try_ckpt(self, agent, env): '''Check then run checkpoint log/eval''' body = agent.body if self.to_ckpt(env, 'log'): body.ckpt(self.env, 'train') body.log_summary('train') if len(body.train_df) > 2: # need more rows to calculate metrics metrics = analysis.analyze_session(self.spec, body.train_df, 'train', plot=False) body.log_metrics(metrics['scalar'], 'train') if self.to_ckpt(env, 'eval'): logger.info('Running eval ckpt') if ps.get(self.spec, 'meta.rigorous_eval'): analysis.gen_avg_return(agent, self.eval_env) body.ckpt(self.eval_env, 'eval') body.log_summary('eval') if body.total_reward_ma >= body.best_total_reward_ma: body.best_total_reward_ma = body.total_reward_ma agent.save(ckpt='best') if len(body.eval_df) > 2: # need more rows to calculate metrics metrics = analysis.analyze_session(self.spec, body.eval_df, 'eval', plot=False) body.log_metrics(metrics['scalar'], 'eval')
def save_if_ckpt(self, agent, env): '''Save for agent, env if episode is at checkpoint''' epi = env.clock.get('epi') save_this_epi = env.done and epi != env.max_episode and epi > 0 and hasattr( env, 'save_epi_frequency') and epi % env.save_epi_frequency == 0 if save_this_epi: agent.save(ckpt='last') analysis.analyze_session(self)
def save_if_ckpt(self, agent, env): '''Save for agent, env if episode is at checkpoint''' tick = env.clock.get(env.max_tick_unit) if hasattr(env, 'save_frequency') and 0 < tick < env.max_tick: if env.max_tick_unit == 'epi': to_save = (env.done and tick % env.save_frequency == 0) else: to_save = (tick % env.save_frequency == 0) else: to_save = False if to_save: agent.save(ckpt='last') if analysis.new_best(agent): agent.save(ckpt='best') analysis.analyze_session(self)
def run(self, time_limit): t0 = time.time() while time.time() < t0 + time_limit: self.run_episode() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data, self.agent
def run(self): self.run_rl() metrics = analysis.analyze_session(self.spec, self.agent.body.eval_df, 'eval') self.agent.body.log_metrics(metrics['scalar'], 'eval') self.close() return metrics
def run(self): while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick: self.run_episode() retro_analysis.try_wait_parallel_eval(self) self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
def try_ckpt(self, agent, env): '''Try to checkpoint agent at the start, save_freq, and the end''' tick = env.clock.get(env.max_tick_unit) to_ckpt = False if not util.in_eval_lab_modes() and tick <= env.max_tick: to_ckpt = (tick % env.eval_frequency == 0) or tick == env.max_tick if env.max_tick_unit == 'epi': # extra condition for epi to_ckpt = to_ckpt and env.done if to_ckpt: if self.spec['meta'].get('parallel_eval'): retro_analysis.run_parallel_eval(self, agent, env) else: self.run_eval_episode() if analysis.new_best(agent): agent.save(ckpt='best') if tick > 0: # nothing to analyze at start analysis.analyze_session(self, eager_analyze_trial=True)
def run(self): while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick: self.run_episode() if util.get_lab_mode() not in ('enjoy', 'eval') and analysis.all_solved( self.agent): logger.info('All environments solved. Early exit.') break self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
def run(self): while self.env.clock.get(self.env.max_tick_unit) < self.env.max_tick: self.run_episode() if util.get_lab_mode() not in ('enjoy', 'eval') and analysis.all_solved( self.agent): logger.info('All environments solved. Early exit.') break if self.eval_proc is not None: # wait for final eval before closing util.run_cmd_wait(self.eval_proc) self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
def run(self): self.run_all_episodes() self.df, self.fitness_df = analysis.analyze_session(self) self.close() return self.df, self.fitness_df
def run(self): self.run_all_episodes() self.data = analysis.analyze_session( self, tmp_space_session_sub=True) # session fitness self.close() return self.data
def run(self): while self.env.clock.get('epi') < self.env.max_episode: self.run_episode() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data