def truncate_paths(paths, max_samples): """ Truncate the list of paths so that the total number of samples is exactly equal to max_samples. This is done by removing extra paths at the end of the list, and make the last path shorter if necessary :param paths: a list of paths :param max_samples: the absolute maximum number of samples :return: a list of paths, truncated so that the number of samples adds up to max-samples """ # chop samples collected by extra paths # make a copy paths = list(paths) total_n_samples = sum(len(path["rewards"]) for path in paths) while len(paths) > 0 and total_n_samples - len( paths[-1]["rewards"]) >= max_samples: total_n_samples -= len(paths.pop(-1)["rewards"]) if len(paths) > 0: last_path = paths.pop(-1) truncated_last_path = dict() truncated_len = len( last_path["rewards"]) - (total_n_samples - max_samples) for k, v in last_path.items(): if k in ["observations", "actions", "rewards"]: truncated_last_path[k] = tensor_utils.truncate_tensor_list( v, truncated_len) elif k in ["env_infos", "agent_infos"]: truncated_last_path[k] = tensor_utils.truncate_tensor_dict( v, truncated_len) else: raise NotImplementedError paths.append(truncated_last_path) return paths
def truncate_paths(paths, max_samples): """ Truncate the list of paths so that the total number of samples is exactly equal to max_samples. This is done by removing extra paths at the end of the list, and make the last path shorter if necessary :param paths: a list of paths :param max_samples: the absolute maximum number of samples :return: a list of paths, truncated so that the number of samples adds up to max-samples """ # chop samples collected by extra paths # make a copy paths = list(paths) total_n_samples = sum(len(path["rewards"]) for path in paths) while len(paths) > 0 and total_n_samples - len(paths[-1]["rewards"]) >= max_samples: total_n_samples -= len(paths.pop(-1)["rewards"]) if len(paths) > 0: last_path = paths.pop(-1) truncated_last_path = dict() truncated_len = len( last_path["rewards"]) - (total_n_samples - max_samples) for k, v in last_path.iteritems(): if k in ["observations", "actions", "rewards"]: truncated_last_path[k] = tensor_utils.truncate_tensor_list( v, truncated_len) elif k in ["env_infos", "agent_infos"]: truncated_last_path[k] = tensor_utils.truncate_tensor_dict( v, truncated_len) else: raise NotImplementedError paths.append(truncated_last_path) return paths