예제 #1
0
    def get_stochastic_action(self, x):
        with torch.no_grad():
            assert len(x.shape) == 2

            q1, q2 = self.forward(x)
            probs = self.get_mean_distribution_from_Qs(q1, q2)

            action = Categorical(probs).sample()
            assert action.shape == (1, )

            return action.detach().cpu().numpy()[0]
예제 #2
0
파일: utils.py 프로젝트: DAIM-2020/DAIM
def select_actions(pi, dist_type, env_type):
    if env_type == 'atari':
        actions = Categorical(pi).sample()
    else:
        if dist_type == 'gauss':
            mean, std = pi
            actions = Normal(mean, std).sample()
        elif dist_type == 'beta':
            alpha, beta = pi
            actions = Beta(alpha.detach().cpu(), beta.detach().cpu()).sample()
    # return actions
    return actions.detach().cpu().numpy().squeeze()
예제 #3
0
def select_actions(pi):
    actions = Categorical(pi).sample()
    # return actions
    return actions.detach().cpu().numpy().squeeze()