Ejemplo n.º 1
0
    def convert_to_tensor(self, arr) -> Tensor:
        """Convert an array to a PyTorch tensor in this policy's device.

        Args:
            arr (array_like): object which can be converted using `np.asarray`
        """
        return convert_to_tensor(arr, self.device)
Ejemplo n.º 2
0
 def get_exploration_action(self,
                            *,
                            action_distribution,
                            timestep,
                            explore=True):
     # pylint:disable=unused-argument
     if explore:
         obs = action_distribution.inputs["obs"]
         acts = ptu.convert_to_tensor(
             [self.action_space.sample() for _ in range(obs.size(0))],
             obs.device)
         logp = ptu.convert_to_tensor(
             [
                 -np.log(self.action_space.high -
                         self.action_space.low).sum(axis=-1)
             ] * obs.size(0),
             obs.device,
         )
         return acts, logp
     return action_distribution.deterministic_sample()
Ejemplo n.º 3
0
def get_actor_outputs(module, row, n_samples):
    obs = ptu.convert_to_tensor(row[SampleBatch.CUR_OBS], "cpu")
    with torch.no_grad():
        acts, logp = module.sample(obs, (n_samples,))
        deterministic, _ = module.deterministic(obs)
        deterministic.unsqueeze_(0)
    log_prob = module.log_prob(obs, acts)
    entropy = -log_prob.mean()
    nll_grad = ptu.flat_grad(entropy, module.parameters())
    return {
        "acts": acts,
        "logp": logp,
        "det": deterministic,
        "log_prob": log_prob.detach(),
        "entropy": entropy.detach(),
        "nll_grad": nll_grad,
    }
Ejemplo n.º 4
0
 def stat_to_tensor_dict(self, info: StatDict) -> TensorDict:
     return {k: convert_to_tensor(v, self.device) for k, v in info.items()}
Ejemplo n.º 5
0
def get_model_samples(module, row, n_samples):
    obs = ptu.convert_to_tensor(row[SampleBatch.CUR_OBS], "cpu")
    act = ptu.convert_to_tensor(row[SampleBatch.ACTIONS], "cpu")
    new_obs, _ = module.sample(obs, act, (n_samples,))
    return new_obs.detach().numpy()