def log_diagnostics(self, paths, log_prefix='Gather', *args, **kwargs): # we call here any logging related to the gather, strip the maze obs # and call log_diag with the stripped paths we need to log the purely # gather reward!! with logger.tabular_prefix(log_prefix + '_'): gather_undiscounted_returns = [ sum(path['env_infos']['outer_rew']) for path in paths ] logger.record_tabular_misc_stat('Return', gather_undiscounted_returns, placement='front') stripped_paths = [] for path in paths: stripped_path = {} for k, v in path.items(): stripped_path[k] = v stripped_path['observations'] = \ stripped_path['observations'][ :, :flat_dim(self.wrapped_env.observation_space)] # this breaks if the obs of the robot are d>1 dimensional (not a # vector) stripped_paths.append(stripped_path) with logger.tabular_prefix('wrapped_'): if 'env_infos' in paths[0].keys( ) and 'inner_rew' in paths[0]['env_infos'].keys(): wrapped_undiscounted_return = np.mean( [np.sum(path['env_infos']['inner_rew']) for path in paths]) logger.record_tabular('AverageReturn', wrapped_undiscounted_return) self.wrapped_env.log_diagnostics( stripped_paths ) # see swimmer_env.py for a scketch of the maze plotting!
def log_diagnostics(self, paths, *args, **kwargs): # we call here any logging related to the maze, strip the maze # obs and call log_diag with the stripped paths we need to log # the purely gather reward!! with logger.tabular_prefix('Maze_'): gather_undiscounted_returns = [ sum(path['env_infos']['outer_rew']) for path in paths ] logger.record_tabular_misc_stat('Return', gather_undiscounted_returns, placement='front') stripped_paths = [] for path in paths: stripped_path = {} for k, v in path.items(): stripped_path[k] = v stripped_path['observations'] = stripped_path[ 'observations'][:, :flat_dim(self.env.observation_space)] # this breaks if the obs of the robot are d>1 dimensional (not a # vector) stripped_paths.append(stripped_path) with logger.tabular_prefix('wrapped_'): wrapped_undiscounted_return = np.mean( [np.sum(path['env_infos']['inner_rew']) for path in paths]) logger.record_tabular('AverageReturn', wrapped_undiscounted_return) self.env.log_diagnostics(stripped_paths, *args, **kwargs)