Python Normal.mode Examples

Programming Language: Python

Namespace/Package Name: torch.distributions

Class/Type: Normal

Method/Function: mode

Examples at hotexamples.com: 2

Python Normal.mode - 2 examples found. These are the top rated real world Python examples of torch.distributions.Normal.mode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Normal(30)

entropy(30)

sample(30)

rsample(30)

log_prob(30)

sum(30)

cdf(30)

sample_n(16)

size(6)

icdf(5)

permute(4)

expand(4)

loc(4)

detach(4)

cpu(4)

mean(4)

scale(3)

gather(3)

backward(3)

reshape(2)

mode(2)

new_zeros(2)

item(2)

log(2)

exp(2)

add_(1)

pow(1)

sort(1)

square(1)

squeeze(1)

perplexity(1)

clamp(1)

numpy(1)

max(1)

clamp_(1)

__init__(1)

float(1)

flatten(1)

chunk(1)

cuda(1)

contiguous(1)

clip(1)

clamp_min_(1)

logsumexp(1)

Example #1

Show file

    def get_action(self, belief, state, det=False):
        actor_out = self.forward(belief, state)
        if self._dist == 'tanh_normal':
            # actor_out.size() == (N x (action_size * 2))
            # replace the below workaround
            raw_init_std = np.log(np.exp(self._init_std) - 1)
            # tmp = torch.tensor(self._init_std,
            #                    device=actor_out.get_device())
            # raw_init_std = torch.log(torch.exp(tmp) - 1)
            action_mean, action_std_dev = torch.chunk(actor_out, 2, dim=1)
            action_mean = self._mean_scale * torch.tanh(
                action_mean / self._mean_scale)
            action_std = F.softplus(action_std_dev +
                                    raw_init_std) + self._min_std

            dist = Normal(action_mean, action_std)
            dist = TransformedDistribution(dist, TanhBijector())
            dist = torch.distributions.Independent(dist, 1)
            dist = SampleDist(dist)
        elif self._dist == 'onehot':
            # actor_out.size() == (N x action_size)
            # fix for RuntimeError: CUDA error: device-side assert triggered
            actor_out = (torch.tanh(actor_out) + 1.0) * 0.5
            dist = Categorical(logits=actor_out)
            dist = OneHotDist(dist)
        else:
            raise NotImplementedError(self._dist)
        if det:
            return dist.mode()
        else:
            return dist.sample()

Example #2

Show file

    def get_action(self, belief, posterior_state, explore=False, det=False):
        state = posterior_state
        B, H, Z = belief.size(0), belief.size(1), state.size(1)

        actions_l_mean_lists, actions_l_std_lists = self.get_action_sequence(
            belief, state, B)

        belief, state = belief.unsqueeze(dim=1).expand(
            B, self.candidates,
            H).reshape(-1, H), state.unsqueeze(dim=1).expand(
                B, self.candidates, Z).reshape(-1, Z)

        # Initialize factorized belief over action sequences q(a_t:t+H) ~ N(0, I)
        # action_mean, action_std_dev = torch.zeros(self.planning_horizon, B, 1, self.action_size, device=belief.device), torch.ones(self.planning_horizon, B, 1, self.action_size, device=belief.device)
        action_mean, action_std_dev = None, None
        for _ in range(self.optimisation_iters):
            # print("optimization_iters",_)
            # Evaluate J action sequences from the current belief (over entire sequence at once, batched over particles)
            if _ == 0:
                sub_action_list = []
                for id in range(len(self.actor_pool)):
                    # a = self.candidates//len(self.actor_pool)
                    action = (
                        actions_l_mean_lists[id] + actions_l_std_lists[id] *
                        torch.randn(self.top_planning_horizon,
                                    B,
                                    self.candidates // len(self.actor_pool),
                                    self.action_size,
                                    device=belief.device)
                    ).view(
                        self.top_planning_horizon,
                        B * self.candidates // len(self.actor_pool),
                        self.action_size
                    )  # Sample actions (time x (batch x candidates) x actions)
                    sub_action_list.append(action)
                actions = torch.cat(sub_action_list, dim=1)
            else:
                actions = (action_mean + action_std_dev * torch.randn(
                    self.top_planning_horizon,
                    B,
                    self.candidates,
                    self.action_size,
                    device=belief.device)).view(
                        self.top_planning_horizon, B * self.candidates,
                        self.action_size
                    )  # Sample actions (time x (batch x candidates) x actions)
            # Sample next states

            beliefs, states, _, _ = self.upper_transition_model(
                state, actions, belief)

            # if args.MultiGPU:
            #   actions_trans = torch.transpose(actions, 0, 1).cuda()
            #   beliefs, states, _, _ = self.transition_model(state, actions_trans, belief)
            #   beliefs, states = list(map(lambda x: x.view(-1, self.candidates, x.shape[2]), [beliefs, states]))
            #
            # else:
            #   beliefs, states, _, _ = self.transition_model(state, actions, belief)
            # beliefs, states, _, _ = self.transition_model(state, actions, belief)# [12, 1000, 200] [12, 1000, 30] : 12 horizon steps; 1000 candidates

            # Calculate expected returns (technically sum of rewards over planning horizon)
            returns = self.reward_model(beliefs.view(-1, H), states.view(
                -1, Z
            )).view(self.top_planning_horizon, -1).sum(
                dim=0)  # output from r-model[12000]->view[12, 1000]->sum[1000]
            # Re-fit belief to the K best action sequencessetting -> Repositories
            _, topk = returns.reshape(B, self.candidates).topk(
                self.top_candidates, dim=1, largest=True, sorted=False)
            topk += self.candidates * torch.arange(
                0, B, dtype=torch.int64, device=topk.device).unsqueeze(
                    dim=1)  # Fix indices for unrolled actions
            best_actions = actions[:, topk.view(-1)].reshape(
                self.top_planning_horizon, B, self.top_candidates,
                self.action_size)
            # Update belief with new means and standard deviations
            action_mean, action_std_dev = best_actions.mean(
                dim=2, keepdim=True), best_actions.std(dim=2,
                                                       unbiased=False,
                                                       keepdim=True)

        # Return sample action from distribution

        dist = Normal(action_mean[0].squeeze(dim=1),
                      action_std_dev[0].squeeze(dim=1))
        dist = TransformedDistribution(dist, TanhBijector())
        dist = torch.distributions.Independent(dist, 1)
        dist = SampleDist(dist)
        if det:
            tmp = dist.mode()
            return tmp
        else:
            tmp = dist.rsample()
            return tmp