Python Categorical.item Examples

Programming Language: Python

Namespace/Package Name: torch.distributions.categorical

Class/Type: Categorical

Method/Function: item

Examples at hotexamples.com: 8

Python Categorical.item - 8 examples found. These are the top rated real world Python examples of torch.distributions.categorical.Categorical.item extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Categorical(30)

entropy(30)

sample(30)

log_prob(30)

unsqueeze(10)

view(9)

item(8)

_new(6)

expand(6)

detach(3)

argmax(2)

long(2)

numpy(2)

tolist(2)

eq(1)

squeeze(1)

to(1)

sum(1)

rsample(1)

split(1)

size(1)

cpu(1)

perplexity(1)

float(1)

mode(1)

Example #1

Show file

def test_model(model_file: str):
    net = ActorCriticNet(4, 2)
    net.load_state_dict(torch.load(model_file))
    net.eval()

    env = gym.make("CartPole-v1")
    env = gym.wrappers.Monitor(env,
                               f"./cart",
                               video_callable=lambda episode_id: True,
                               force=True)

    observation = env.reset()

    R = 0
    while True:
        env.render()
        cleaned_observation = torch.tensor(observation).unsqueeze(dim=0)
        action_logits = net.forward_actor(cleaned_observation)
        action = Categorical(logits=action_logits).sample()
        observation, r, done, _ = env.step(action.item())
        R += r
        if done:
            break

    env.close()

    print(R)

Example #2

Show file

    def act(self, obs):
        A, b, c0, curr_sol, (A_cuts, b_cuts) = obs
        A = torch.from_numpy(A).float()
        b = torch.from_numpy(b).unsqueeze(-1).float()
        A_cuts = torch.from_numpy(A_cuts).float()
        b_cuts = torch.from_numpy(b_cuts).unsqueeze(-1).float()

        Ab = torch.cat([A, b], dim=1)
        cut_ab = torch.cat([A_cuts, b_cuts], dim=1)
        all_ob = torch.cat([Ab, cut_ab], dim=0)

        if self.normalize:
            all_ob = (all_ob - all_ob.mean()) / (all_ob.max() - all_ob.min() +
                                                 1e-8)

        constraints = all_ob[:A.shape[0], :]
        cuts = all_ob[A.shape[0]:, :]
        constraints_embed = self.mlp_embed(constraints)
        cuts_embed = self.mlp_embed(cuts)

        att_map = cuts_embed.matmul(constraints_embed.T)
        score = att_map.mean(dim=1)
        score -= score.max()
        probs = F.softmax(score, dim=0)
        action = Categorical(probs).sample()
        return action.item()

Example #3

Show file

File: minerl_model.py Project: yao07/MineRL_submission

 def get_action(self, inv_obs):
     with torch.no_grad():
         device = self.pi.weight.device
         obs = torch.from_numpy(inv_obs).to(device).float()[None, ...]
         pi = F.softmax(self.pi(self.mlp(obs)), dim=-1)
         action = Categorical(probs=pi).sample()
     return action.item()

Example #4

Show file

File: minerl_model.py Project: yao07/MineRL_submission

 def get_action(self, pov_obs):
     with torch.no_grad():
         device = self.conv.conv.conv[0].weight.device
         obs = torch.from_numpy(pov_obs).to(device).float()[None, ...]
         obs = obs.mul_(1. / 255)
         pi = F.softmax(self.pi(self.conv(obs)), dim=-1)
         action = Categorical(probs=pi).sample()
     return action.item()

Example #5

Show file

File: a3c.py Project: Axel-Jacobsen/a3c

    def play_episode(self):
        episode_actions = torch.empty(size=(0, ), dtype=torch.long)
        episode_logits = torch.empty(size=(0, self.env.action_space.n),
                                     dtype=torch.long)
        episode_observs = torch.empty(size=(0,
                                            *self.env.observation_space.shape),
                                      dtype=torch.long)
        episode_rewards = np.empty(shape=(0, ), dtype=np.float)

        observation = self.env.reset()

        t = 0
        done = False
        while not done:
            # Prepare observation
            cleaned_observation = torch.tensor(observation).unsqueeze(dim=0)
            episode_observs = torch.cat((episode_observs, cleaned_observation),
                                        dim=0)

            # Get action from policy net
            action_logits = self.proc_net.forward_actor(cleaned_observation)
            action = Categorical(logits=action_logits).sample()

            # Save observation and the action from the net
            episode_logits = torch.cat((episode_logits, action_logits), dim=0)
            episode_actions = torch.cat((episode_actions, action), dim=0)

            # Get new observation and reward from action
            observation, r, done, _ = self.env.step(action.item())

            # Save reward from net_action
            episode_rewards = np.concatenate(
                (episode_rewards, np.asarray([r])), axis=0)

            t += 1

        discounted_R = self.get_discounted_rewards(episode_rewards, GAMMA)
        discounted_R -= episode_rewards.mean()

        mask = F.one_hot(episode_actions, num_classes=self.env.action_space.n)
        episode_log_probs = torch.sum(mask.float() *
                                      F.log_softmax(episode_logits, dim=1),
                                      dim=1)

        values = self.proc_net.forward_critic(episode_observs)
        action_advantage = (discounted_R.float() - values).detach()
        episode_weighted_log_probs = episode_log_probs * action_advantage
        sum_weighted_log_probs = torch.sum(
            episode_weighted_log_probs).unsqueeze(dim=0)
        sum_action_advantages = torch.sum(action_advantage).unsqueeze(dim=0)

        return (
            sum_weighted_log_probs,
            sum_action_advantages,
            episode_logits,
            np.sum(episode_rewards),
            t,
        )

Example #6

Show file

    def generate(self, inputs, hidden, generated_seq_len, id_2_word):
        # TODO ========================
        # Compute the forward pass, as in the self.forward method (above).
        # You'll probably want to copy substantial portions of that code here.
        #
        # We "seed" the generation by providing the first inputs
        # Subsequent inputs are generated by sampling from the output distribution,
        # as described in the tex (Problem 5.3)
        # Unlike for self.forward, you WILL need to apply the softmax activation
        # function here in order to compute the parameters of the categorical
        # distributions to be sampled from at each time-step.
        """
    Arguments:
        - input: A mini-batch of input tokens (NOT sequences!)
                        shape: (batch_size)
        - hidden: The initial hidden states for every layer of the stacked RNN.
                        shape: (num_layers, batch_size, hidden_size)
        - generated_seq_len: The length of the sequence to generate.
                       Note that this can be different than the length used
                       for training (self.seq_len)
    Returns:
        - Sampled sequences of tokens
                    shape: (generated_seq_len, batch_size)
    """
        samples = torch.zeros([generated_seq_len, self.batch_size],
                              dtype=torch.long,
                              device=hidden.device)
        samples[0] = inputs

        outp = self.embedding(
            inputs)  # shape: (self.batch_size, self.emb_size)
        for i in range(1, generated_seq_len):
            for j in range(self.num_layers):
                inp = self.inp_dp(outp) if j == 0 else outp
                hid = hidden[j]
                outp, hidden[j] = self.model[j](inp=inp.clone(),
                                                hidden=hid.clone())

            outp = self.Wy(outp)
            dist = F.softmax(outp,
                             dim=1)  # shape (self.batch_size, self.vocab_size)
            for k in range(dist.size(0)):
                while True:
                    s = Categorical(dist[k]).sample()
                    if id_2_word[s.item()] != '<eos>':
                        break
                samples[i, k] = s
            outp = self.embedding(samples[i])

        return samples

Example #7

Show file

File: diayn.py Project: ahavenoname/DIAYN

    def episode(self, train=True, render=False, z=None, return_states=False):
        """Run one episode.

        Parameters
        ----------
        train : bool
            If True, perform update on underlying parameters and store reward
            into self.rewards.
        render : bool
            If True, display the episode with env.render and return total
            reward.
        z : torch.Tensor
            Skill value. If None, a random skill is sampled from self.prior.
        return_states : bool
            If True, return the list of states of the episode.
        """
        s = self.env.reset()
        if z is None:
            z = self.prior.sample()
        p_z = self.prior.log_prob(z)
        done, step, total_reward = False, 0, 0
        if return_states:
            states = [s]
        while not done:
            pi = self.actor(s, z)  # log P(a | s, z)
            a = Categorical(torch.exp(pi)).sample()  # Sample action
            new_s, _, done, _ = self.env.step(a.item())
            q = self.discriminator(s)  # log P(z | s)
            reward = q[:, z.argmax(dim=1)] - self.alpha * pi[:, a] - p_z
            if train:  # Perform update
                self._update_models(pi, a, q, reward, s, z, new_s, done)
            total_reward += reward.item()
            if render:  # Render the environment
                self.env.render()
            step += 1
            s = new_s
            if return_states:
                states.append(s)
        if train:  # Store episode score
            self.n_episode += 1
            self.rewards.append(total_reward / step)
        if render:  # Return episode score
            return total_reward
        if return_states:
            return states

Example #8

Show file

File: a2c.py Project: ahavenoname/DIAYN

    def episode(self, train=True, render=False, return_states=False):
        """
        Run one episode.

        Parameters
        ----------
        train : bool
            If True, perform update on underlying parameters and store reward
            into self.rewards.
        render : bool
            If True, display the episode with env.render and return total
            reward.
        return_states : bool
            If True, return the list of states of the episode.
        """
        s = self.env.reset()
        done, step, total_reward = False, 0, 0
        if return_states:
            states = [s]
        while not done:
            pi = self.actor(s)  # log P(a | s)
            a = Categorical(torch.exp(pi)).sample()  # Sample action
            new_s, reward, done, _ = self.env.step(a.item())
            #print(2*(np.abs(new_s[0]+0.5)+20*np.abs(new_s[1])))
            reward = torch.Tensor([[reward + (np.abs(new_s[0] + 0.5))]])
            if train:  # Perform update
                self._update_models(pi, a, reward, s, new_s, done)
            #print(reward.item())
            total_reward += reward.item()
            if render:  # Render the environment
                self.env.render()
            step += 1
            s = new_s
            if return_states:
                states.append(s)
        if train:  # Store episode score
            self.n_episode += 1
            self.rewards.append(total_reward)
        if render:  # Return episode score
            return total_reward
        if return_states:
            return states