def __init__(self, obs_dim, act_dim, size, info_shapes, gamma=0.99, lam=0.95): """ Initialize properties: Environment: observations, actions, rewards, total expected rewards Computed: advantages, values, logps, infos, Training: gamma, lam Store: path trajectory, path start index, max size of store Get sorted info keys """ self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) self.adv_buf = np.zeros(size, dtype=np.float32) self.rew_buf = np.zeros(size, dtype=np.float32) self.ret_buf = np.zeros(size, dtype=np.float32) self.val_buf = np.zeros(size, dtype=np.float32) self.logp_buf = np.zeros(size, dtype=np.float32) self.info_bufs = { k: np.zeros([size] + list(v), dtype=np.float32) for k, v in info_shapes.items() } self.sorted_info_keys = core.keys_as_sorted_list(self.info_bufs) self.gamma, self.lam = gamma, lam self.ptr, self.path_start_idx, self.max_size = 0, 0, size
def __init__(self, obs_dim, act_dim, size, info_shapes, gamma=0.99, lam=0.95): self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) self.adv_buf = np.zeros(size, dtype=np.float32) self.rew_buf = np.zeros(size, dtype=np.float32) self.ret_buf = np.zeros(size, dtype=np.float32) self.val_buf = np.zeros(size, dtype=np.float32) self.logp_buf = np.zeros(size, dtype=np.float32) self.info_bufs = { k: np.zeros([size] + list(v), dtype=np.float32) for k, v in info_shapes.items() } self.sorted_info_keys = core.keys_as_sorted_list(self.info_bufs) self.gamma, self.lam = gamma, lam self.ptr, self.path_start_idx, self.max_size = 0, 0, size