def try_ckpt(self, agent, env): '''Check then run checkpoint log/eval''' body = agent.body if self.to_ckpt(env, 'log') and self.env.clock.get('epi') > self.warmup_epi: body.train_ckpt() body.log_summary('train') if self.to_ckpt(env, 'eval'): avg_return, avg_len, avg_success, avg_p, avg_r, avg_f1, avg_book_rate = analysis.gen_avg_result( agent, self.eval_env, self.num_eval) body.eval_ckpt(self.eval_env, avg_return, avg_len, avg_success) body.log_summary('eval') if body.eval_reward_ma >= body.best_reward_ma: body.best_reward_ma = body.eval_reward_ma agent.save(ckpt='best') if self.env.clock.get('epi') > self.warmup_epi: if len(body.train_df ) > 1: # need > 1 row to calculate stability metrics = analysis.analyze_session(self.spec, body.train_df, 'train') if len(body.eval_df ) > 1: # need > 1 row to calculate stability metrics = analysis.analyze_session(self.spec, body.eval_df, 'eval')
def run_eval(self): avg_return, avg_len, avg_success, avg_p, avg_r, avg_f1, avg_book_rate = analysis.gen_avg_result(self.agent, self.eval_env, self.num_eval) result = f'{self.num_eval} episodes, {avg_return:.2f} return' if not avg_success is None: result += f', {avg_success*100:.2f}% success rate' if avg_len: result += f', {avg_len:.2f} turns' if avg_p: result += f', {avg_p:.2f} P, {avg_r:.2f} R, {avg_f1:.2f} F1' if avg_book_rate: result += f', {avg_book_rate*100:.2f}% book rate' logger.info(result)