예제 #1
0
 def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.gamma, self.lam = gamma, lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size
예제 #2
0
파일: vpg.py 프로젝트: Tubbz-alt/TEAC
 def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                             dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                             dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.gamma, self.lam = gamma, lam
     self.device = torch.device(
         "cuda") if torch.cuda.is_available() else torch.device("cpu")
     self.device = torch.device("cpu")
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size
예제 #3
0
파일: vpg.py 프로젝트: rashmi-iyer/RISE
    def __init__(self,
                 obs_dim,
                 act_dim,
                 size,
                 gamma=0.99,
                 lam=0.95,
                 hierstep=3):
        self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                                dtype=np.float32)
        self.init_obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                                     dtype=np.float32)
        self.goal_buf = np.zeros(core.combined_shape(size, obs_dim),
                                 dtype=np.float32)
        self.count_buf = np.zeros(core.combined_shape(size, act_dim),
                                  dtype=np.float32)
        self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                                dtype=np.float32)
        self.adv_buf = np.zeros(size, dtype=np.float32)
        self.rew_buf = np.zeros(size, dtype=np.float32)
        self.ret_buf = np.zeros(size, dtype=np.float32)
        self.val_buf = np.zeros(size, dtype=np.float32)
        self.logp_buf = np.zeros(size, dtype=np.float32)

        self.obshi_buf = np.zeros(core.combined_shape(size, obs_dim),
                                  dtype=np.float32)
        self.acthi_buf = np.zeros(core.combined_shape(size, obs_dim),
                                  dtype=np.float32)
        self.advhi_buf = np.zeros(size, dtype=np.float32)
        self.rewhi_buf = np.zeros(size, dtype=np.float32)
        self.rethi_buf = np.zeros(size, dtype=np.float32)
        self.valhi_buf = np.zeros(size, dtype=np.float32)
        self.logphi_buf = np.zeros(size, dtype=np.float32)

        self.gamma, self.lam, self.hierstep = gamma, lam, hierstep
        self.ptr, self.path_start_idx, self.max_size = 0, 0, size
        self.ptrhi, self.path_start_idxhi = 0, 0