def compute_path_info(infos): all_env_infos = [ppp.list_of_dicts__to__dict_of_lists(ep_info) for ep_info in infos] statistics = OrderedDict() stat_prefix = "" for k in all_env_infos[0].keys(): final_ks = np.array([info[k][-1] for info in all_env_infos]) first_ks = np.array([info[k][0] for info in all_env_infos]) all_ks = np.concatenate([info[k] for info in all_env_infos]) statistics.update( create_stats_ordered_dict( stat_prefix + k, final_ks, stat_prefix="{}/final/".format("env_infos"), ) ) statistics.update( create_stats_ordered_dict( stat_prefix + k, first_ks, stat_prefix="{}/initial/".format("env_infos"), ) ) statistics.update( create_stats_ordered_dict( stat_prefix + k, all_ks, stat_prefix="{}/".format("env_infos"), ) ) return statistics
def process_samples(self, paths): r"""Process sample data based on the collected paths. Notes: P is the maximum path length (self.max_path_length) Args: paths (list[dict]): A list of collected paths Returns: torch.Tensor: The observations of the environment with shape :math:`(N, P, O*)`. torch.Tensor: The actions fed to the environment with shape :math:`(N, P, A*)`. torch.Tensor: The acquired rewards with shape :math:`(N, P)`. list[int]: Numbers of valid steps in each paths. torch.Tensor: Value function estimation at each step with shape :math:`(N, P)`. """ if self.exploration_bonus > 0.: paths = self._add_exploration_bonus(paths) valids = torch.Tensor([len(path['actions']) for path in paths]).int().to(ptu.device) obs = torch.stack([ pad_to_last(path['observations'], total_length=self.max_path_length, axis=0) for path in paths ]).to(ptu.device) actions = torch.stack([ pad_to_last(path['actions'], total_length=self.max_path_length, axis=0) for path in paths ]).to(ptu.device) rewards = torch.stack([ pad_to_last(path['rewards'].reshape(-1), total_length=self.max_path_length) for path in paths ]).to(ptu.device) returns = torch.stack([ pad_to_last(tu.discount_cumsum(path['rewards'].reshape(-1), self.discount).copy(), total_length=self.max_path_length) for path in paths ]).to(ptu.device) # batch x label_num x label_dim env_infos = [ ppp.list_of_dicts__to__dict_of_lists(p['env_infos']) for p in paths ] labels = torch.stack([ pad_to_last(env_info['sup_labels'], total_length=self.max_path_length, axis=0) for env_info in env_infos ]).to(ptu.device) with torch.no_grad(): baselines = self._value_function(obs).squeeze(-1) return obs, actions, rewards, returns, valids, baselines, labels
def get_generic_path_information(paths, stat_prefix=''): """ Get an OrderedDict with a bunch of statistic names and values. """ statistics = OrderedDict() returns = [sum(path["rewards"]) for path in paths] rewards = np.vstack([path["rewards"] for path in paths]) statistics.update( create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix)) statistics.update( create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix)) actions = [path["actions"] for path in paths] if len(actions[0].shape) == 1: actions = np.hstack([path["actions"] for path in paths]) else: actions = np.vstack([path["actions"] for path in paths]) statistics.update( create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix)) statistics['Num Paths'] = len(paths) statistics[stat_prefix + 'Average Returns'] = get_average_returns(paths) avg_time, num_solves, avg_solve_time = get_average_time_and_num_solve( paths) statistics[stat_prefix + 'Average Time'] = avg_time statistics[stat_prefix + 'Average Solve Time'] = avg_solve_time statistics[stat_prefix + 'Number of Solves'] = num_solves for info_key in ['env_infos', 'agent_infos']: if info_key in paths[0]: all_env_infos = [ ppp.list_of_dicts__to__dict_of_lists(path[info_key]) for path in paths ] for k in all_env_infos[0].keys(): final_ks = np.array([info[k][-1] for info in all_env_infos]) first_ks = np.array([info[k][0] for info in all_env_infos]) all_ks = np.concatenate([info[k] for info in all_env_infos]) statistics.update( create_stats_ordered_dict( stat_prefix + k, final_ks, stat_prefix='{}/final/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, first_ks, stat_prefix='{}/initial/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, all_ks, stat_prefix='{}/'.format(info_key), )) return statistics
def get_generic_path_information(paths, stat_prefix=""): """ Get an OrderedDict with a bunch of statistic names and values. """ statistics = OrderedDict() returns = [sum(path["rewards"]) for path in paths] rewards = np.vstack([path["rewards"] for path in paths]) statistics.update( create_stats_ordered_dict("Rewards", rewards, stat_prefix=stat_prefix)) statistics.update( create_stats_ordered_dict("Returns", returns, stat_prefix=stat_prefix)) actions = [path["actions"] for path in paths] if len(actions[0].shape) == 1: actions = np.hstack([path["actions"] for path in paths]) else: actions = np.vstack([path["actions"] for path in paths]) statistics.update( create_stats_ordered_dict("Actions", actions, stat_prefix=stat_prefix)) statistics["Num Paths"] = len(paths) statistics["Proportion exploration"] = sum([ sum(path["explored"]) for path in paths ])[0] / sum([len(path["explored"]) for path in paths]) statistics[stat_prefix + "Average Returns"] = get_average_returns(paths) for info_key in ["agent_infos"]: if info_key in paths[0]: all_env_infos = [ ppp.list_of_dicts__to__dict_of_lists(p[info_key]) for p in paths ] for k in all_env_infos[0].keys(): final_ks = np.array([info[k][-1] for info in all_env_infos]) first_ks = np.array([info[k][0] for info in all_env_infos]) all_ks = np.concatenate([info[k] for info in all_env_infos]) statistics.update( create_stats_ordered_dict( stat_prefix + k, final_ks, stat_prefix="{}/final/".format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, first_ks, stat_prefix="{}/initial/".format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, all_ks, stat_prefix="{}/".format(info_key))) return statistics
def get_traffic_path_information(paths, stat_prefix=''): """ Get an OrderedDict with a bunch of statistic names and values. """ statistics = OrderedDict() returns = [sum(path["rewards"]) for path in paths] rewards = np.vstack([path["rewards"] for path in paths]) statistics.update( create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix)) statistics.update( create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix)) actions = [path["actions"] for path in paths] if len(actions[0].shape) == 1: actions = np.hstack([path["actions"] for path in paths]) else: actions = np.vstack([path["actions"] for path in paths]) statistics.update( create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix)) statistics['Num Paths'] = len(paths) statistics[stat_prefix + 'Average Returns'] = get_average_returns(paths) num_collision, num_block, num_outroad, num_success, num_timeout = 0, 0, 0, 0, 0 log_path = logger.get_snapshot_dir() for pid, path in enumerate(paths): event = path["env_infos"][-1]['event'] if event == 'collision': num_collision += 1 elif event == 'block': num_block += 1 elif event == 'outroad': num_outroad += 1 elif event == 'goal': num_success += 1 else: num_timeout += 1 statistics['Num Collision'] = num_collision statistics['Num Block'] = num_block statistics['Num Outroad'] = num_outroad statistics['Num Success'] = num_success statistics['Num Timeout'] = num_timeout for info_key in ['agent_infos']: if info_key in paths[0]: all_env_infos = [ ppp.list_of_dicts__to__dict_of_lists(p[info_key]) for p in paths ] for k in all_env_infos[0].keys(): final_ks = np.array([info[k][-1] for info in all_env_infos]) first_ks = np.array([info[k][0] for info in all_env_infos]) all_ks = np.concatenate([info[k] for info in all_env_infos]) statistics.update( create_stats_ordered_dict( stat_prefix + k, final_ks, stat_prefix='{}/final/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, first_ks, stat_prefix='{}/initial/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, all_ks, stat_prefix='{}/'.format(info_key), )) return statistics
def get_ma_path_information(paths, stat_prefix=''): """ Get an OrderedDict with a bunch of statistic names and values. """ num_agent = paths[0]["rewards"].shape[1] statistics = OrderedDict() for agent in range(num_agent): returns = [sum(path["rewards"][:, agent, :]) for path in paths] rewards = np.vstack([path["rewards"][:, agent, :] for path in paths]) statistics.update( create_stats_ordered_dict('Rewards {}'.format(agent), rewards, stat_prefix=stat_prefix)) statistics.update( create_stats_ordered_dict('Returns {}'.format(agent), returns, stat_prefix=stat_prefix)) actions = [path["actions"][:, agent, :] for path in paths] actions = np.vstack(actions) statistics.update( create_stats_ordered_dict('Actions {}'.format(agent), actions, stat_prefix=stat_prefix)) statistics[ stat_prefix + 'Average Returns {}'.format(agent)] = get_ma_average_returns( paths, agent) statistics['Num Paths'] = len(paths) num_success = 0 for path in paths: if path['terminals'][-1].all(): num_success += 1 statistics['Success Rate'] = float(num_success) / float(len(paths)) for info_key in ['env_infos', 'agent_infos']: if info_key in paths[0]: all_env_infos = [ ppp.list_of_dicts__to__dict_of_lists(p[info_key]) for p in paths ] for k in all_env_infos[0].keys(): final_ks = np.array([info[k][-1] for info in all_env_infos]) first_ks = np.array([info[k][0] for info in all_env_infos]) all_ks = np.concatenate([info[k] for info in all_env_infos]) statistics.update( create_stats_ordered_dict( stat_prefix + k, final_ks, stat_prefix='{}/final/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, first_ks, stat_prefix='{}/initial/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, all_ks, stat_prefix='{}/'.format(info_key), )) return statistics
def get_custom_generic_path_information(paths, path_length, reward_scale, stat_prefix=''): """ Get an OrderedDict with a bunch of statistic names and values. Differs from normal rlkit utility function in the following ways: Grabs normalized reward / return values where reward is normalized to 1.0 Grabs cumulative reward specified accumulated at @path_length timestep """ statistics = OrderedDict() returns = [sum(path["rewards"]) for path in paths] # Grab returns accumulated up to specified timestep expl_returns = [sum(path["rewards"][:path_length]) for path in paths] rewards = np.vstack([path["rewards"] for path in paths]) # norm_rewards = [path["rewards"] / reward_scale for path in paths] statistics.update( eval_util.create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix)) statistics.update( eval_util.create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix)) # Add extra stats statistics.update( eval_util.create_stats_ordered_dict('ExplReturns', expl_returns, stat_prefix=stat_prefix)) actions = [path["actions"] for path in paths] if len(actions[0].shape) == 1: actions = np.hstack([path["actions"] for path in paths]) else: actions = np.vstack([path["actions"] for path in paths]) statistics.update( eval_util.create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix)) statistics['Num Paths'] = len(paths) statistics[stat_prefix + 'Average Returns'] = eval_util.get_average_returns(paths) for info_key in ['env_infos', 'agent_infos']: if info_key in paths[0]: all_env_infos = [ ppp.list_of_dicts__to__dict_of_lists(p[info_key]) for p in paths ] for k in all_env_infos[0].keys(): final_ks = np.array([info[k][-1] for info in all_env_infos]) first_ks = np.array([info[k][0] for info in all_env_infos]) all_ks = np.concatenate([info[k] for info in all_env_infos]) statistics.update( eval_util.create_stats_ordered_dict( stat_prefix + k, final_ks, stat_prefix='{}/final/'.format(info_key), )) statistics.update( eval_util.create_stats_ordered_dict( stat_prefix + k, first_ks, stat_prefix='{}/initial/'.format(info_key), )) statistics.update( eval_util.create_stats_ordered_dict( stat_prefix + k, all_ks, stat_prefix='{}/'.format(info_key), )) return statistics
def get_generic_path_information(paths, stat_prefix=''): """ Get an OrderedDict with a bunch of statistic names and values. """ statistics = OrderedDict() returns = [sum(path["rewards"]) for path in paths] rewards = np.vstack([path["rewards"] for path in paths]) statistics.update( create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix)) statistics.update( create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix)) actions = [path["actions"] for path in paths] if len(actions[0].shape) == 1: actions = np.hstack([path["actions"] for path in paths]) else: actions = np.vstack([path["actions"] for path in paths]) statistics.update( create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix)) statistics['Num Paths'] = len(paths) statistics[stat_prefix + 'Average Returns'] = get_average_returns(paths) # rlkit path info for info_key in ['env_infos', 'agent_infos']: if info_key in paths[0]: all_env_infos = [ ppp.list_of_dicts__to__dict_of_lists(p[info_key]) for p in paths ] for k in all_env_infos[0].keys(): # final_ks = np.array([info[k][-1] for info in all_env_infos]) # first_ks = np.array([info[k][0] for info in all_env_infos]) # all_ks = np.concatenate([info[k] for info in all_env_infos]) # statistics.update(create_stats_ordered_dict( # stat_prefix + k, # final_ks, # stat_prefix='{}/final/'.format(info_key), # )) # statistics.update(create_stats_ordered_dict( # stat_prefix + k, # first_ks, # stat_prefix='{}/initial/'.format(info_key), # )) # statistics.update(create_stats_ordered_dict( # stat_prefix + k, # all_ks, # stat_prefix='{}/'.format(info_key), # )) sum_ks = [np.sum(info[k]) for info in all_env_infos] average_ks = [np.mean(info[k]) for info in all_env_infos] statistics.update( create_stats_ordered_dict( stat_prefix + k, sum_ks, stat_prefix='{}/sum/'.format(info_key), )) statistics.update( create_stats_ordered_dict( stat_prefix + k, average_ks, stat_prefix='{}/average/'.format(info_key), )) return statistics