def get_stochastic_action(self, x): with torch.no_grad(): assert len(x.shape) == 2 q1, q2 = self.forward(x) probs = self.get_mean_distribution_from_Qs(q1, q2) action = Categorical(probs).sample() assert action.shape == (1, ) return action.detach().cpu().numpy()[0]
def select_actions(pi, dist_type, env_type): if env_type == 'atari': actions = Categorical(pi).sample() else: if dist_type == 'gauss': mean, std = pi actions = Normal(mean, std).sample() elif dist_type == 'beta': alpha, beta = pi actions = Beta(alpha.detach().cpu(), beta.detach().cpu()).sample() # return actions return actions.detach().cpu().numpy().squeeze()
def select_actions(pi): actions = Categorical(pi).sample() # return actions return actions.detach().cpu().numpy().squeeze()