Esempio n. 1
0
 def __init__(
     self,
     size,
     obs_shape,
     act_shape,
     pi_info_shapes,
     gamma=0.99,
     lam=0.95,
     cost_gamma=0.99,
     cost_lam=0.95,
 ):
     self.obs_buf = np.zeros(combined_shape(size, obs_shape),
                             dtype=np.float32)
     self.act_buf = np.zeros(combined_shape(size, act_shape),
                             dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.cadv_buf = np.zeros(size, dtype=np.float32)  # cost advantage
     self.cost_buf = np.zeros(size, dtype=np.float32)  # costs
     self.cret_buf = np.zeros(size, dtype=np.float32)  # cost return
     self.cval_buf = np.zeros(size, dtype=np.float32)  # cost value
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.pi_info_bufs = {
         k: np.zeros([size] + list(v), dtype=np.float32)
         for k, v in pi_info_shapes.items()
     }
     self.sorted_pi_info_keys = keys_as_sorted_list(self.pi_info_bufs)
     self.gamma, self.lam = gamma, lam
     self.cost_gamma, self.cost_lam = cost_gamma, cost_lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size
Esempio n. 2
0
def placeholder(dim=None):
    return tf.placeholder(dtype=tf.float32, shape=combined_shape(None, dim))