def act(self, obs: rlt.FeatureData) -> rlt.ActorOutput: """ Act randomly regardless of the observation. """ obs: torch.Tensor = obs.float_features assert obs.dim() >= 2, f"obs has shape {obs.shape} (dim < 2)" batch_size = obs.size(0) # pyre-fixme[6]: Expected `Union[torch.Size, torch.Tensor]` for 1st param # but got `Tuple[int]`. action = self.dist.sample((batch_size, )) log_prob = self.dist.log_prob(action) return rlt.ActorOutput(action=action, log_prob=log_prob)
def act(self, obs: rlt.FeatureData) -> rlt.ActorOutput: """ Act randomly regardless of the observation. """ obs: torch.Tensor = obs.float_features assert obs.dim() >= 2, f"obs has shape {obs.shape} (dim < 2)" batch_size = obs.shape[0] weights = torch.ones((batch_size, self.num_actions)) # sample a random action m = torch.distributions.Categorical(weights) raw_action = m.sample() action = F.one_hot(raw_action, self.num_actions) log_prob = m.log_prob(raw_action).float() return rlt.ActorOutput(action=action, log_prob=log_prob)
def act( self, obs: rlt.FeatureData, possible_actions_mask: Optional[np.ndarray] = None ) -> rlt.ActorOutput: """ Act randomly regardless of the observation. """ obs: torch.Tensor = obs.float_features assert obs.dim() >= 2, f"obs has shape {obs.shape} (dim < 2)" batch_size = obs.size(0) # pyre-fixme[6]: Expected `Union[torch.Size, torch.Tensor]` for 1st param # but got `Tuple[int]`. action = self.dist.sample((batch_size,)) # sum over action_dim (since assuming i.i.d. per coordinate) log_prob = self.dist.log_prob(action).sum(1) return rlt.ActorOutput(action=action, log_prob=log_prob)
def act( self, obs: rlt.FeatureData, possible_actions_mask: Optional[np.ndarray] = None ) -> rlt.ActorOutput: """ Act randomly regardless of the observation. """ obs: torch.Tensor = obs.float_features assert obs.dim() >= 2, f"obs has shape {obs.shape} (dim < 2)" assert obs.shape[0] == 1, f"obs has shape {obs.shape} (0th dim != 1)" batch_size = obs.shape[0] scores = torch.ones((batch_size, self.num_actions)) scores = apply_possible_actions_mask( scores, possible_actions_mask, invalid_score=0.0 ) # sample a random action m = torch.distributions.Categorical(scores) raw_action = m.sample() action = F.one_hot(raw_action, self.num_actions) log_prob = m.log_prob(raw_action).float() return rlt.ActorOutput(action=action, log_prob=log_prob)