def forward(self, obs, skill):
     obs = {
         "observations": torchify(obs, self.device),
         "diayn": torchify(skill, self.device),
     }
     act = self.forward_policy.action(obs, deterministic=True)
     return untorchify(act)
Ejemplo n.º 2
0
 def add(self, batch):
     self.buffer.add(
         **untorchify({
             "obs": self.obs_flat(batch["obs"]),
             "act": self.act_flat(batch["act"]),
             "next_obs": self.obs_flat(batch["next_obs"]),
             "rew": batch["rew"],
             "done": batch["done"],
         }))
Ejemplo n.º 3
0
 def step(self, action: Tensor) -> Tuple[Tensor, Tensor, Tensor, dict]:
     action = untorchify(action)
     next_obs, reward, done, info = self.env.step(action)
     return (
         torchify(next_obs, self.device),
         torchify(reward, self.device),
         torchify(done, self.device),
         info,
     )
Ejemplo n.º 4
0
 def add(self, batch: dict) -> None:
     self.buffer.add(
         # HACK: Add in the indices since cpprb doesn't have
         # sample_with_indices function
         index=self.buffer.get_next_index() +
         np.arange(batch["rew"].shape[0]),
         **untorchify({
             "obs": self.obs_flat(batch["obs"]),
             "act": self.act_flat(batch["act"]),
             "next_obs": self.obs_flat(batch["next_obs"]),
             "rew": batch["rew"],
             "done": batch["done"],
         }),
     )
 def backward(self, obs):
     obs = {"observations": torchify(obs, self.device)}
     act = self.backward_policy.action(obs, deterministic=True)
     return untorchify(act)