def log_diagnostics(self, paths, *args, **kwargs): # we call here any logging related to the maze, strip the maze # obs and call log_diag with the stripped paths we need to log # the purely gather reward!! with logger.tabular_prefix('Maze_'): gather_undiscounted_returns = [ sum(path['env_infos']['outer_rew']) for path in paths ] logger.record_tabular_misc_stat('Return', gather_undiscounted_returns, placement='front') stripped_paths = [] for path in paths: stripped_path = {} for k, v in path.items(): stripped_path[k] = v stripped_path['observations'] = stripped_path[ 'observations'][:, :flat_dim(self.env.observation_space)] # this breaks if the obs of the robot are d>1 dimensional (not a # vector) stripped_paths.append(stripped_path) with logger.tabular_prefix('wrapped_'): wrapped_undiscounted_return = np.mean( [np.sum(path['env_infos']['inner_rew']) for path in paths]) logger.record_tabular('AverageReturn', wrapped_undiscounted_return) self.env.log_diagnostics(stripped_paths, *args, **kwargs)
def log_diagnostics(self, paths): progs = [ path["observations"][-1][-3] - path["observations"][0][-3] for path in paths ] logger.record_tabular('AverageForwardProgress', np.mean(progs)) logger.record_tabular('MaxForwardProgress', np.max(progs)) logger.record_tabular('MinForwardProgress', np.min(progs)) logger.record_tabular('StdForwardProgress', np.std(progs))