Example #1
0
    def show(self):

        self.history['MeanReward'].append(np.mean(self.stats['rewards']))
        self.history['StdReward'].append(np.std(self.stats['rewards']))
        self.history['MinReward'].append(np.min(self.stats['rewards']))

        self.history['KL'].append(self.logger.log_entry['KL'])
        self.history['Beta'].append(self.logger.log_entry['Beta'])
        self.history['Variance'].append(self.logger.log_entry['Variance'])
        self.history['PolicyEntropy'].append(
            self.logger.log_entry['PolicyEntropy'])
        self.history['ExplainedVarNew'].append(
            self.logger.log_entry['ExplainedVarNew'])
        self.history['Episode'].append(self.episode)

        self.history['Norm_rf'].append(np.mean(self.stats['norm_rf']))
        self.history['SD_rf'].append(
            np.mean(self.stats['norm_rf'] + np.std(self.stats['norm_rf'])))
        self.history['Max_rf'].append(np.max(self.stats['norm_rf']))

        self.history['Norm_vf'].append(np.mean(self.stats['norm_vf']))
        self.history['SD_vf'].append(
            np.mean(self.stats['norm_vf'] + np.std(self.stats['norm_vf'])))
        self.history['Max_vf'].append(np.max(self.stats['norm_vf']))

        self.history['Norm_af'].append(np.mean(self.stats['norm_af']))
        self.history['SD_af'].append(
            np.mean(self.stats['norm_af'] + np.std(self.stats['norm_af'])))
        self.history['Max_af'].append(np.max(self.stats['norm_af']))

        self.history['Norm_wf'].append(np.mean(self.stats['norm_wf']))
        self.history['SD_wf'].append(
            np.mean(self.stats['norm_wf'] + np.std(self.stats['norm_wf'])))
        self.history['Max_wf'].append(np.max(self.stats['norm_wf']))

        self.history['MeanSteps'].append(np.mean(self.stats['steps']))
        self.history['MaxSteps'].append(np.max(self.stats['steps']))

        if self.allow_plotting:
            envu.render_traj(self.lander.trajectory,
                             vf=self.vf,
                             scaler=self.scaler)

            self.plot_rewards()
            self.plot_learning()
            self.plot_rf()
            self.plot_vf()
            self.plot_af()
            self.plot_wf()
        if self.update_cnt % self.print_every == 0:
            self.show_stats()
            self.clear()
        self.update_cnt += 1
Example #2
0
 def render(self):
     self.rl_stats.show(self.episode)
     self.rl_stats.clear()
     if self.allow_plotting:
         envu.render_traj(self.lander.trajectory)