def forward(self, obs, skill): obs = { "observations": torchify(obs, self.device), "diayn": torchify(skill, self.device), } act = self.forward_policy.action(obs, deterministic=True) return untorchify(act)
def add(self, batch): self.buffer.add( **untorchify({ "obs": self.obs_flat(batch["obs"]), "act": self.act_flat(batch["act"]), "next_obs": self.obs_flat(batch["next_obs"]), "rew": batch["rew"], "done": batch["done"], }))
def step(self, action: Tensor) -> Tuple[Tensor, Tensor, Tensor, dict]: action = untorchify(action) next_obs, reward, done, info = self.env.step(action) return ( torchify(next_obs, self.device), torchify(reward, self.device), torchify(done, self.device), info, )
def add(self, batch: dict) -> None: self.buffer.add( # HACK: Add in the indices since cpprb doesn't have # sample_with_indices function index=self.buffer.get_next_index() + np.arange(batch["rew"].shape[0]), **untorchify({ "obs": self.obs_flat(batch["obs"]), "act": self.act_flat(batch["act"]), "next_obs": self.obs_flat(batch["next_obs"]), "rew": batch["rew"], "done": batch["done"], }), )
def backward(self, obs): obs = {"observations": torchify(obs, self.device)} act = self.backward_policy.action(obs, deterministic=True) return untorchify(act)