Ejemplo n.º 1
0
    def __init__(self,
                 obs_dim,
                 act_dim,
                 size,
                 info_shapes,
                 gamma=0.99,
                 lam=0.95):
        """
        Initialize properties:

        Environment:
        observations, actions, rewards, total expected rewards

        Computed:
        advantages, values, logps, infos,

        Training:
        gamma, lam

        Store:
        path trajectory, path start index, max size of store

        Get sorted info keys
        """

        self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                                dtype=np.float32)
        self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                                dtype=np.float32)
        self.adv_buf = np.zeros(size, dtype=np.float32)
        self.rew_buf = np.zeros(size, dtype=np.float32)
        self.ret_buf = np.zeros(size, dtype=np.float32)
        self.val_buf = np.zeros(size, dtype=np.float32)
        self.logp_buf = np.zeros(size, dtype=np.float32)
        self.info_bufs = {
            k: np.zeros([size] + list(v), dtype=np.float32)
            for k, v in info_shapes.items()
        }
        self.sorted_info_keys = core.keys_as_sorted_list(self.info_bufs)
        self.gamma, self.lam = gamma, lam
        self.ptr, self.path_start_idx, self.max_size = 0, 0, size
Ejemplo n.º 2
0
 def __init__(self,
              obs_dim,
              act_dim,
              size,
              info_shapes,
              gamma=0.99,
              lam=0.95):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                             dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                             dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.info_bufs = {
         k: np.zeros([size] + list(v), dtype=np.float32)
         for k, v in info_shapes.items()
     }
     self.sorted_info_keys = core.keys_as_sorted_list(self.info_bufs)
     self.gamma, self.lam = gamma, lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size