예제 #1
0
    def try_ckpt(self, agent, env):
        '''Check then run checkpoint log/eval'''
        body = agent.body
        if self.to_ckpt(env,
                        'log') and self.env.clock.get('epi') > self.warmup_epi:
            body.train_ckpt()
            body.log_summary('train')

        if self.to_ckpt(env, 'eval'):
            avg_return, avg_len, avg_success, avg_p, avg_r, avg_f1, avg_book_rate = analysis.gen_avg_result(
                agent, self.eval_env, self.num_eval)
            body.eval_ckpt(self.eval_env, avg_return, avg_len, avg_success)
            body.log_summary('eval')
            if body.eval_reward_ma >= body.best_reward_ma:
                body.best_reward_ma = body.eval_reward_ma
                agent.save(ckpt='best')
            if self.env.clock.get('epi') > self.warmup_epi:
                if len(body.train_df
                       ) > 1:  # need > 1 row to calculate stability
                    metrics = analysis.analyze_session(self.spec,
                                                       body.train_df, 'train')
                if len(body.eval_df
                       ) > 1:  # need > 1 row to calculate stability
                    metrics = analysis.analyze_session(self.spec, body.eval_df,
                                                       'eval')
예제 #2
0
 def run_eval(self):
     avg_return, avg_len, avg_success, avg_p, avg_r, avg_f1, avg_book_rate = analysis.gen_avg_result(self.agent, self.eval_env, self.num_eval) 
     result = f'{self.num_eval} episodes, {avg_return:.2f} return'
     if not avg_success is None:
         result += f', {avg_success*100:.2f}% success rate'
     if avg_len:
         result += f', {avg_len:.2f} turns'
     if avg_p:
         result += f', {avg_p:.2f} P, {avg_r:.2f} R, {avg_f1:.2f} F1'
     if avg_book_rate:
         result += f', {avg_book_rate*100:.2f}% book rate'
     logger.info(result)