def show(self): self.history['MeanReward'].append(np.mean(self.stats['rewards'])) self.history['StdReward'].append(np.std(self.stats['rewards'])) self.history['MinReward'].append(np.min(self.stats['rewards'])) self.history['KL'].append(self.logger.log_entry['KL']) self.history['Beta'].append(self.logger.log_entry['Beta']) self.history['Variance'].append(self.logger.log_entry['Variance']) self.history['PolicyEntropy'].append( self.logger.log_entry['PolicyEntropy']) self.history['ExplainedVarNew'].append( self.logger.log_entry['ExplainedVarNew']) self.history['Episode'].append(self.episode) self.history['Norm_rf'].append(np.mean(self.stats['norm_rf'])) self.history['SD_rf'].append( np.mean(self.stats['norm_rf'] + np.std(self.stats['norm_rf']))) self.history['Max_rf'].append(np.max(self.stats['norm_rf'])) self.history['Norm_vf'].append(np.mean(self.stats['norm_vf'])) self.history['SD_vf'].append( np.mean(self.stats['norm_vf'] + np.std(self.stats['norm_vf']))) self.history['Max_vf'].append(np.max(self.stats['norm_vf'])) self.history['Norm_af'].append(np.mean(self.stats['norm_af'])) self.history['SD_af'].append( np.mean(self.stats['norm_af'] + np.std(self.stats['norm_af']))) self.history['Max_af'].append(np.max(self.stats['norm_af'])) self.history['Norm_wf'].append(np.mean(self.stats['norm_wf'])) self.history['SD_wf'].append( np.mean(self.stats['norm_wf'] + np.std(self.stats['norm_wf']))) self.history['Max_wf'].append(np.max(self.stats['norm_wf'])) self.history['MeanSteps'].append(np.mean(self.stats['steps'])) self.history['MaxSteps'].append(np.max(self.stats['steps'])) if self.allow_plotting: envu.render_traj(self.lander.trajectory, vf=self.vf, scaler=self.scaler) self.plot_rewards() self.plot_learning() self.plot_rf() self.plot_vf() self.plot_af() self.plot_wf() if self.update_cnt % self.print_every == 0: self.show_stats() self.clear() self.update_cnt += 1
def render(self): self.rl_stats.show(self.episode) self.rl_stats.clear() if self.allow_plotting: envu.render_traj(self.lander.trajectory)