Esempio n. 1
0
    def get_stochastic_action(self, x):
        with torch.no_grad():
            assert len(x.shape) == 2

            q1, q2 = self.forward(x)
            probs = self.get_mean_distribution_from_Qs(q1, q2)

            action = Categorical(probs).sample()
            assert action.shape == (1, )

            return action.detach().cpu().numpy()[0]
Esempio n. 2
0
def select_actions(pi, dist_type, env_type):
    if env_type == 'atari':
        actions = Categorical(pi).sample()
    else:
        if dist_type == 'gauss':
            mean, std = pi
            actions = Normal(mean, std).sample()
        elif dist_type == 'beta':
            alpha, beta = pi
            actions = Beta(alpha.detach().cpu(), beta.detach().cpu()).sample()
    # return actions
    return actions.detach().cpu().numpy().squeeze()
Esempio n. 3
0
def select_actions(pi):
    actions = Categorical(pi).sample()
    # return actions
    return actions.detach().cpu().numpy().squeeze()