예제 #1
0
 def log_diagnostics(self, paths, *args, **kwargs):
     # we call here any logging related to the maze, strip the maze
     # obs and call log_diag with the stripped paths we need to log
     # the purely gather reward!!
     with logger.tabular_prefix('Maze_'):
         gather_undiscounted_returns = [
             sum(path['env_infos']['outer_rew']) for path in paths
         ]
         logger.record_tabular_misc_stat('Return',
                                         gather_undiscounted_returns,
                                         placement='front')
     stripped_paths = []
     for path in paths:
         stripped_path = {}
         for k, v in path.items():
             stripped_path[k] = v
         stripped_path['observations'] = stripped_path[
             'observations'][:, :flat_dim(self.env.observation_space)]
         #  this breaks if the obs of the robot are d>1 dimensional (not a
         #  vector)
         stripped_paths.append(stripped_path)
     with logger.tabular_prefix('wrapped_'):
         wrapped_undiscounted_return = np.mean(
             [np.sum(path['env_infos']['inner_rew']) for path in paths])
         logger.record_tabular('AverageReturn', wrapped_undiscounted_return)
         self.env.log_diagnostics(stripped_paths, *args, **kwargs)
예제 #2
0
 def log_diagnostics(self, paths):
     progs = [
         path["observations"][-1][-3] - path["observations"][0][-3]
         for path in paths
     ]
     logger.record_tabular('AverageForwardProgress', np.mean(progs))
     logger.record_tabular('MaxForwardProgress', np.max(progs))
     logger.record_tabular('MinForwardProgress', np.min(progs))
     logger.record_tabular('StdForwardProgress', np.std(progs))