Python Categorical.cpu Examples

Programming Language: Python

Namespace/Package Name: torch.distributions

Class/Type: Categorical

Method/Function: cpu

Examples at hotexamples.com: 16

`torch.distributions.Categorical` is a class in the PyTorch library that represents a categorical distribution over a finite set of classes or outcomes. It is used to model discrete probability distributions such as those seen in classification problems.

Here are some examples of how to use `torch.distributions.Categorical` in Python:

Example 1: Simulating random categorical distribution

import torch

# Defining the probability distribution as weights 
weights = torch.tensor([0.1, 0.3, 0.6])

# Creating a Categorical distribution with given weights
cat_dist = torch.distributions.Categorical(weights)

# Sampling from the distribution
sample = cat_dist.sample()

# Printing the sampled value
print(sample)

In this example, we defined a probability distribution with weights and created a `Categorical` distribution using PyTorch. Then we drew a sample from this distribution using the `sample()` method and printed the result.

Example 2: Calculating the log probability of a sequence of samples

import torch

# Defining the probability distribution as weights 
weights = torch.tensor([0.1, 0.3, 0.6])

# Creating a Categorical distribution with given weights
cat_dist = torch.distributions.Categorical(weights)

# Creating a sequence of samples to calculate the log probabilities for
samples = torch.tensor([1, 0, 2, 2, 1])

# Calculating the log probability of the sequence of samples
log_probs = cat_dist.log_prob(samples)

# Printing the log probabilities
print(log_probs)

Here, we created a `Categorical` distribution with weights as before. Then we created a sequence of samples and used the `log_prob()` method of the `Categorical` distribution to calculate the log probability of the sequence. We printed the result to the console. Both examples above are part of the PyTorch library.

Python Categorical.cpu - 16 examples found. These are the top rated real world Python examples of torch.distributions.Categorical.cpu extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Categorical(30)

detach(21)

cpu(16)

_new(2)

apply_noise(1)

argmax(1)

Example #1

Show file

  def play_episode(self,episode:int):
    state = self.env.reset()
    previous_x = None
    episode_actions = torch.empty(size=(0,),dtype=torch.long,device=self.device)
    episode_logits = torch.empty(size=(0,self.env.action_space.n),device=self.device)
    average_rewards = numpy.empty(shape=(0,), dtype=numpy.float)
    episode_rewards = numpy.empty(shape=(0,), dtype=numpy.float)

    while True:
      #if not self.render:
      #  self.env.render()
      current_x = self.PreProcessing(state)
      x = current_x - previous_x if previous_x is not None else numpy.zeros_like(current_x)
      previous_x = current_x
      action_logits = self.agent(torch.tensor(x).float().unsqueeze(dim=0).to(self.device))
      episode_logits = torch.cat((action_logits,episode_logits),dim=0)
      action = Categorical(logits=action_logits).sample()
      episode_actions = torch.cat((episode_actions,action),dim=0)

      state,reward,done,_ = self.env.step(action = action.cpu().item())
      episode_rewards = numpy.concatenate((episode_rewards,numpy.array([reward])),axis=0)
      average_rewards = numpy.concatenate((average_rewards,numpy.expand_dims(numpy.mean(episode_rewards),axis=0)),axis=0)

      if done:
        episode+=1
        discounted_rewards = PG_RL.get_discounted_rewards(rewards=episode_rewards,gamma=self.gamma)
        discounted_rewards -= average_rewards
        discounted_rewards /= numpy.std(discounted_rewards)
        sum_of_rewards = numpy.sum(episode_rewards)
        mask = one_hot(episode_actions,num_classes=self.env.action_space.n)
        episode_log_probs = torch.sum(mask.float()*log_softmax(episode_logits,dim=1),dim=1)
        episode_weighted_log_probs = episode_log_probs * torch.tensor(discounted_rewards).float().to(self.device)
        sum_weighted_log_probs = torch.sum(episode_weighted_log_probs).unsqueeze(dim=0)
        #show_video()
        return sum_weighted_log_probs, episode_logits, sum_of_rewards, episode

Example #2

Show file

File: option_critic.py Project: mohit242/Option-Critic-Pytorch

 def agent_step(self):
     with torch.no_grad():
         action_probs, log_probs, termination_probs, q_u, q_omega = self.policy(
             self.state)
         if self.current_option is None:
             self.current_option = Categorical(
                 probs=self._epsilon_probs(q_omega[0])).sample()
         action = Categorical(
             probs=action_probs[0, self.current_option, :]).sample()
         action = action.cpu().detach().numpy()
         # action = self.env.action_space.sample()
         action = int(action)
     next_state, reward, done, info = self.env.step(action)
     # self.env.render()
     self.replay_buffer.add([
         self.state, action, self.current_option, self.previous_option,
         reward, next_state, done
     ])
     self.state = next_state
     self.previous_option = self.current_option
     if done:
         self.agent_reset()
     elif termination_probs[0, self.current_option] >= torch.rand(1):
         self.current_option = None
     return reward, done

Example #3

Show file

File: DSEvaluation.py Project: MorganeAyle/SNIP-it

    def evaluate(self, true_labels, all_preds, entropies, **kwargs):
        ood_entropies = np.zeros(0)
        accuracies = []

        with torch.no_grad():
            for batch_num, batch in enumerate(self.ds_loader):
                x, y = batch
                x = x.to(self.device)

                if not self.ensemble:
                    out = self.model(x)
                else:
                    out = 0
                    for model in self.ensemble:
                        out += model(x)
                    out /= len(self.ensemble)
                probs = F.softmax(out, dim=-1)
                preds, _ = torch.max(probs, dim=-1)

                # entropy
                entropy = Categorical(probs).entropy().squeeze()
                entropies = np.concatenate(
                    (entropies, entropy.detach().cpu().numpy()))
                ood_entropies = np.concatenate(
                    (ood_entropies, entropy.cpu().numpy()))

                # accuracy
                predictions = out.argmax(dim=-1, keepdim=True).view_as(y).cpu()
                correct = y.eq(predictions).sum().item()
                acc = correct / out.shape[0]

                accuracies.append(acc)

                true_labels = np.concatenate((true_labels, np.zeros(len(x))))
                all_preds = np.concatenate((all_preds, preds.cpu().reshape(
                    (-1))))

        auroc = calculate_auroc(true_labels, all_preds)
        aupr = calculate_aupr(true_labels, all_preds)

        auroc_entropy = calculate_auroc(1 - true_labels, entropies)
        aupr_entropy = calculate_aupr(1 - true_labels, entropies)

        auroc_name = f'auroc_{self.ds_dataset}'
        aupr_name = f'aupr_{self.ds_dataset}'
        auroc_ent_name = f'auroc_entropy_{self.ds_dataset}'
        aupr_ent_name = f'aupr_entropy_{self.ds_dataset}'
        entropy_name = f'entropy_{self.ds_dataset}'
        acc_name = f"acc_{self.ds_dataset}"

        return {
            acc_name: np.mean(accuracies),
            auroc_name: auroc,
            aupr_name: aupr,
            entropy_name: np.mean(ood_entropies),
            auroc_ent_name: auroc_entropy,
            aupr_ent_name: aupr_entropy
        }

Example #4

Show file

    def play_ep(self):
        # reset env state after every episode
        state = self.env.reset() 
        prev_x = None
        episode_actions = torch.empty(size=(0,), dtype=torch.long, device=self.device)
        episode_logits = torch.empty(size=(0, 2),device=self.device)
        average_rewards = np.empty(shape=(0,), dtype=np.float)
        episode_rewards = np.empty(shape=(0,), dtype=np.float)
    
        while True:
            # render env for display 
            if self.render_env:
                self.env.render()

            # pre-preprocess current the state and subtract from previous state to add-in motion information
            cur_x = prepro(state)    
            x = cur_x - prev_x if prev_x is not None else np.zeros(self.in_sz).astype(np.float32)
            prev_x = cur_x

            # get choice from network
            action_logit = self.agent(torch.tensor(x).float().unsqueeze(0).to(self.device))
            # add to buffer
            episode_logits = torch.cat((episode_logits, action_logit), dim=0)
            # sample and action and execute the action 
            action = Categorical(logits=action_logit).sample()
            # add to buffer
            episode_actions = torch.cat((episode_actions, action),dim=0)

            state, reward, done, _ = self.env.step(action=action.cpu().item())

            # add to buffer 
            episode_rewards = np.concatenate((episode_rewards, np.array([reward])), axis=0)
            
            # like averaging from 1 to nth time step (on-average return till that time step)
            average_rewards = np.concatenate((average_rewards, np.expand_dims(np.mean(episode_rewards), axis=0)), axis=0)

            if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
                print(('ep #: game finished, reward: %f' % (reward)) + ('' if reward == -1 else ' !!!!!!!!'))
                
            if done: # end of episode
                # get discounted rewards and normalize the return
                discounted_rewards = discount_rewards(episode_rewards, gamma=self.gamma)
                    
                # subtract baseline rewards 
                discounted_rewards -= average_rewards
                    
                # set mask for the actions executed 
                mask = one_hot(episode_actions, num_classes=2)
                
                # similar to cross-entropy for classification but with fake labels and our action confidence
                weighted_ps = torch.sum(mask.float() * log_softmax(episode_logits, dim=1), dim=1)
                    
                # weight the loss with the discounted rewards to get expected reward from distribution 
                episode_weighted_loss = weighted_ps * torch.tensor(discounted_rewards).float().to(self.device)
                
                return episode_weighted_loss, episode_logits, episode_rewards

Example #5

Show file

File: networks.py Project: arjun-kg/rl_implementations

    def get_action(self, state_np):
        state_th = torch.tensor(state_np).float()
        action_th = self.forward(state_th)

        if self.type == 'discrete':
            action_sampled_th = Categorical(logits=action_th).sample()
        else:
            raise NotImplementedError

        action_sampled_np = action_sampled_th.cpu().detach().numpy()
        return action_sampled_np

Example #6

Show file

    def choose_action(self, states, buffer=True):
        probs, values = self.forward(states)
        # print("values:", values)
        # print("probs:", probs)
        actions = Categorical(probs).sample()

        if buffer:
            self.state_buffer.append(states)
            self.value_buffer.append(values)
            self.prob_buffer.append(probs)
            self.action_buffer.append(torch.unsqueeze(actions, 1))
        # print("actions:", actions)
        actions = actions.cpu().numpy() + 1
        values = values.detach().cpu().numpy()
        probs = probs.detach().cpu().numpy()
        return actions, values, probs

Example #7

Show file

    def evaluate(self, true_labels, all_preds, entropies, **kwargs):
        ood_entropies = np.zeros(0)

        with torch.no_grad():
            for batch_num, batch in enumerate(self.ood_loader):
                x, y = batch
                x = x.float().to(self.device)

                if not self.ensemble:
                    out = self.model(x)
                else:
                    out = 0
                    for model in self.ensemble:
                        out += model(x)
                    out /= len(self.ensemble)
                probs = F.softmax(out, dim=-1)
                preds, _ = torch.max(probs, dim=-1)

                entropy = Categorical(probs).entropy().squeeze()
                entropies = np.concatenate(
                    (entropies, entropy.detach().cpu().numpy()))
                ood_entropies = np.concatenate(
                    (ood_entropies, entropy.cpu().numpy()))

                true_labels = np.concatenate((true_labels, np.zeros(len(x))))
                all_preds = np.concatenate((all_preds, preds.cpu().reshape(
                    (-1))))

        auroc = calculate_auroc(true_labels, all_preds)
        aupr = calculate_aupr(true_labels, all_preds)

        auroc_entropy = calculate_auroc(1 - true_labels, entropies)
        aupr_entropy = calculate_aupr(1 - true_labels, entropies)

        auroc_name = f'auroc_{self.ood_dataset}'
        aupr_name = f'aupr_{self.ood_dataset}'
        auroc_ent_name = f'auroc_entropy_{self.ood_dataset}'
        aupr_ent_name = f'aupr_entropy_{self.ood_dataset}'
        entropy_name = f'entropy_{self.ood_dataset}'

        return {
            auroc_name: auroc,
            aupr_name: aupr,
            entropy_name: np.mean(ood_entropies),
            auroc_ent_name: auroc_entropy,
            aupr_ent_name: aupr_entropy
        }

Example #8

Show file

    def _step(self, obs, hiddens, masks):

        with torch.no_grad():
            values, action_probs, hiddens = self.model(obs, hiddens, masks)

        actions = Categorical(action_probs.detach()).sample()

        # Sample actions from the output distributions
        obs, rewards, dones, infos = self.envs.step(actions.cpu().numpy())
        obs = torch.from_numpy(obs)
        rewards = torch.from_numpy(rewards).unsqueeze(1)
        masks = torch.from_numpy(1 - (dones)).unsqueeze(1)
        actions = actions.unsqueeze(1)

        self.rollouts.insert(
            obs,  #next
            hiddens,  #next
            actions,  #now
            action_probs,  #now
            values,  #now
            rewards,  #now
            masks)  #next

Example #9

Show file

    def get_next_batch(self, env):

        for _ in range(C.NUM_EPOCHS):

            epoch_logits = torch.empty(size=(0, self.action_space_size),
                                       device=self.DEVICE)
            epoch_weighted_log_probs = torch.empty(size=(0, ),
                                                   dtype=torch.float,
                                                   device=self.DEVICE)
            total_rewards = deque([], maxlen=C.BATCH_SIZE_PER_THREAD)

            episode_counter = 0

            while episode_counter < C.BATCH_SIZE_PER_THREAD:

                episode_counter += 1

                # reset the environment to a random initial state every epoch
                state = env.reset()

                # initialize the episode arrays
                episode_actions = torch.empty(size=(0, ),
                                              dtype=torch.long,
                                              device=self.DEVICE)
                episode_logits = torch.empty(size=(0, C.action_space_size),
                                             device=self.DEVICE)
                average_rewards = np.empty(shape=(0, ), dtype=np.float)
                episode_rewards = np.empty(shape=(0, ), dtype=np.float)

                # episode loop
                for step_index in range(0, C.max_simulation_length):

                    # get the action logits from the agent - (preferences)
                    action_logits = self.m(
                        torch.tensor(state).float().unsqueeze(dim=0).to(
                            self.DEVICE))

                    # append the logits to the episode logits list
                    episode_logits = torch.cat((episode_logits, action_logits),
                                               dim=0)

                    # sample an action according to the action distribution
                    action = Categorical(logits=action_logits).sample()

                    # append the action to the episode action list to obtain the trajectory
                    # we need to store the actions and logits so we could calculate the gradient of the performance
                    episode_actions = torch.cat((episode_actions, action),
                                                dim=0)

                    # take the chosen action, observe the reward and the next state
                    state, reward, done, _ = env.step(
                        action=action.cpu().item())

                    # append the reward to the rewards pool that we collect during the episode
                    # we need the rewards so we can calculate the weights for the policy gradient
                    # and the baseline of average
                    episode_rewards = np.concatenate(
                        (episode_rewards, np.array([reward])), axis=0)

                    # here the average reward is state specific
                    average_rewards = np.concatenate(
                        (average_rewards,
                         np.expand_dims(np.mean(episode_rewards), axis=0)),
                        axis=0)

                # turn the rewards we accumulated during the episode into the rewards-to-go:
                # earlier actions are responsible for more rewards than the later taken actions
                discounted_rewards_to_go = utils.get_discounted_rewards(
                    rewards=episode_rewards, gamma=C.GAMMA)
                discounted_rewards_to_go -= average_rewards  # baseline - state specific average

                # calculate the sum of the rewards for the running average metric
                sum_of_rewards = np.sum(episode_rewards)

                # after each episode append the sum of total rewards to the deque
                total_rewards.append(sum_of_rewards)

                # set the mask for the actions taken in the episode
                mask = one_hot(episode_actions,
                               num_classes=C.action_space_size)

                # calculate the log-probabilities of the taken actions
                # mask is needed to filter out log-probabilities of not related logits
                episode_log_probs = torch.sum(
                    mask.float() * log_softmax(episode_logits, dim=1), dim=1)

                # weight the episode log-probabilities by the rewards-to-go
                episode_weighted_log_probs = episode_log_probs * \
                    torch.tensor(discounted_rewards_to_go).float().to(self.DEVICE)

                # calculate the sum over trajectory of the weighted log-probabilities
                sum_weighted_log_probs = torch.sum(
                    episode_weighted_log_probs).unsqueeze(dim=0)

                # append the weighted log-probabilities of actions
                epoch_weighted_log_probs = torch.cat(
                    (epoch_weighted_log_probs, sum_weighted_log_probs), dim=0)

                # append the logits - needed for the entropy bonus calculation
                epoch_logits = torch.cat((epoch_logits, episode_logits), dim=0)

                # calculate the loss
                loss, entropy = utils.calculate_loss(
                    C.BETA,
                    epoch_logits=epoch_logits,
                    weighted_log_probs=epoch_weighted_log_probs)

            yield loss, total_rewards

Example #10

Show file

    def _update(self, states, actions, rewards, advantages, returns, masks,
                epoch):
        old_model = copy.deepcopy(self.model)

        policy_losses = np.array([])
        entropies = np.array([])
        value_losses = np.array([])
        losses = np.array([])

        for _ in range(self.ppo_epochs):
            rand_list = (torch.randperm(self.batch_num * self.batch_size).view(
                -1, self.batch_size).tolist())

            for ind in rand_list:
                batch = states[ind]
                actor_logits, vals, _ = self.model(batch)
                log_probs = F.log_softmax(actor_logits, dim=1)
                with torch.no_grad():
                    old_actor_logits, _, _ = old_model(batch)
                    old_log_probs = F.log_softmax(old_actor_logits, dim=1)

                adv = advantages[ind].to(self.device)
                advs = advantages.to(self.device)
                adv = (adv - advs.mean()) / (advs.std() + 1e-8)

                A = returns[ind].to(self.device) - vals

                action = actions[ind].to(self.device)

                old_log_probs = old_log_probs.gather(1, action)
                log_probs = log_probs.gather(1, action)

                r = (log_probs - old_log_probs).exp()

                clip = r.clamp(min=1 - self.epsilon, max=1 + self.epsilon)
                L, _ = torch.stack([r * adv.detach(),
                                    clip * adv.detach()]).min(0)
                v_l = A.pow(2).mean()
                L = L.mean()

                entropy = Categorical(F.softmax(actor_logits,
                                                dim=1)).entropy().mean()

                loss = -L + self.v_loss_coef * v_l - self.entropy_coef * entropy

                self.optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         self.max_grad_norm)
                self.optimizer.step()

                policy_losses = np.append(policy_losses,
                                          L.cpu().detach().numpy())
                value_losses = np.append(value_losses,
                                         v_l.cpu().detach().numpy())
                losses = np.append(losses, loss.cpu().detach().numpy())
                entropies = np.append(entropies,
                                      entropy.cpu().detach().numpy())

        policy_loss = policy_losses.mean()
        value_loss = value_losses.mean()
        loss = losses.mean()
        entropy = entropies.mean()

        self.writer.add_scalar("PolicyLoss", policy_loss, epoch + 1)
        self.writer.add_scalar("ValueLoss", value_loss, epoch + 1)
        self.writer.add_scalar("Loss", loss, epoch + 1)
        self.writer.add_scalar("Entropy", entropy, epoch + 1)

        del states, actions, rewards, advantages, returns, masks

Example #11

Show file

def play_episode(environment, device, action_space_size, agent, gamma,
                 episode: int):
    """
            Plays an episode of the environment.
            episode: the episode counter
            Returns:
                sum_weighted_log_probs: the sum of the log-prob of an action multiplied by the reward-to-go from that state
                episode_logits: the logits of every step of the episode - needed to compute entropy for entropy bonus
                finished_rendering_this_epoch: pass-through rendering flag
                sum_of_rewards: sum of the rewards for the episode - needed for the average over 200 episode statistic
        """

    agent.to('cpu')
    device = 'cpu'

    # reset the environment to a random initial state every epoch
    state = environment.reset()

    # initialize the episode arrays
    episode_actions = torch.empty(size=(0, ), dtype=torch.long, device=device)
    episode_logits = torch.empty(size=(0, action_space_size), device=device)
    average_rewards = np.empty(shape=(0, ), dtype=np.float)
    episode_rewards = np.empty(shape=(0, ), dtype=np.float)

    # episode loop
    while True:

        # get the action logits from the agent - (preferences)
        action_logits = agent(
            torch.tensor(state).float().unsqueeze(dim=0).to(device))

        #print('action logits is',action_logits)

        # append the logits to the episode logits list
        episode_logits = torch.cat((episode_logits, action_logits), dim=0)

        # sample an action according to the action distribution
        action = Categorical(logits=action_logits).sample()

        #print('the action after categorical is',action)

        # append the action to the episode action list to obtain the trajectory
        # we need to store the actions and logits so we could calculate the gradient of the performance
        episode_actions = torch.cat((episode_actions, action), dim=0)

        # take the chosen action, observe the reward and the next state
        state, reward, done, _ = environment.step(action=action.cpu().item())

        # append the reward to the rewards pool that we collect during the episode
        # we need the rewards so we can calculate the weights for the policy gradient
        # and the baseline of average
        episode_rewards = np.concatenate((episode_rewards, np.array([reward])),
                                         axis=0)

        # here the average reward is state specific
        average_rewards = np.concatenate(
            (average_rewards, np.expand_dims(np.mean(episode_rewards),
                                             axis=0)),
            axis=0)

        # the episode is over
        if done:

            # increment the episode
            episode += 1

            # turn the rewards we accumulated during the episode into the rewards-to-go:
            # earlier actions are responsible for more rewards than the later taken actions
            discounted_rewards_to_go = utils.get_discounted_rewards(
                rewards=episode_rewards, gamma=gamma)
            discounted_rewards_to_go -= average_rewards  # baseline - state specific average

            # # calculate the sum of the rewards for the running average metric
            sum_of_rewards = np.sum(episode_rewards)

            # set the mask for the actions taken in the episode
            mask = one_hot(episode_actions,
                           num_classes=environment.action_space.n)

            # calculate the log-probabilities of the taken actions
            # mask is needed to filter out log-probabilities of not related logits
            episode_log_probs = torch.sum(mask.float() *
                                          log_softmax(episode_logits, dim=1),
                                          dim=1)

            # weight the episode log-probabilities by the rewards-to-go
            episode_weighted_log_probs = episode_log_probs * \
                torch.tensor(discounted_rewards_to_go).float().to(device)

            # calculate the sum over trajectory of the weighted log-probabilities
            sum_weighted_log_probs = torch.sum(
                episode_weighted_log_probs).unsqueeze(dim=0)

            sum_weighted_log_probs = sum_weighted_log_probs.to('cpu')
            episode_logits = episode_logits.to('cpu')

            sum_weighted_log_probs = sum_weighted_log_probs.to(device)
            episode_logits = episode_logits.to(device)

            return sum_weighted_log_probs, episode_logits, sum_of_rewards, episode

Example #12

Show file

File: model.py Project: muyeby/AMR-Dialogue

    def inference(
        self,
        sent_memory_emb,
        graph_memory_emb,
        sent_memory_mask,
        graph_memory_mask,
        max_step,
        use_sampling=False,
    ):
        batch_size, sent_memory_seq, dim = list(sent_memory_emb.shape)
        _, graph_memory_seq, _ = list(graph_memory_emb.shape)

        sent_memory_mask_inv = sent_memory_mask == 0  # [batch, sent_memory_seq]
        graph_memory_mask_inv = graph_memory_mask == 0  # [batch, sent_memory_seq]

        target_ids = [[self.BOS
                       for i in range(batch_size)]]  # [target_seq, batch]
        target_mask = [[1.0] for i in range(batch_size)]  # [batch, target_seq]
        target_prob = []  # [target_seq, batch]
        is_finish = [False for _ in range(batch_size)]
        rows = torch.arange(batch_size).to(device)
        for step in range(max_step):
            cur_seq = step + 1
            cur_emb = self.dec_word_embedding(
                torch.tensor(target_ids).to(device))  # [cur_seq, batch, dim]
            cur_emb = self.position_encoder(cur_emb)  # [cur_seq, batch, dim]

            cur_mask = torch.tensor(target_mask).to(device)
            cur_mask_inv = cur_mask == 0.0  # [batch, cur_seq]
            cur_triu_mask = torch.triu(torch.ones(cur_seq, cur_seq).to(device),
                                       diagonal=1)  # [cur_seq, cur_seq]
            cur_triu_mask.masked_fill_(cur_triu_mask == 1, -1e20)

            cur_emb = self.decoder(
                cur_emb,
                sent_memory_emb,  # [batch, sent_len, dim]
                graph_memory_emb,  # [batch, graph_len, dim]
                tgt_mask=cur_triu_mask,
                tgt_key_padding_mask=cur_mask_inv,
                sent_memory_key_padding_mask=sent_memory_mask_inv,
                graph_memory_key_padding_mask=graph_memory_mask_inv,
            )  # [batch, cur_seq, dim]

            assert has_nan(cur_emb) is False

            # break after the first time when all items are finished
            if all(is_finish) or step == max_step - 1:
                cur_len = cur_mask.sum(dim=1).long()
                target_vec = universal_sentence_embedding(
                    cur_emb, cur_mask, cur_len)
                break

            # generating step outputs
            logits = self.projector(cur_emb[:, -1, :]).view(
                batch_size, self.word_vocab_size)  # [batch, vocab]
            if use_sampling is False:
                indices = logits.argmax(dim=1)  # [batch]
            else:
                indices = Categorical(logits=logits).sample()  # [batch]

            prob = F.softmax(logits, dim=1)[rows, indices]  # [batch]
            target_prob.append(prob)
            indices = indices.cpu().tolist()
            target_ids.append(indices)
            for i in range(batch_size):
                target_mask[i].append(
                    0.0 if is_finish[i] else
                    1.0)  # based on if is_finish in the last step

            for i in range(batch_size):
                is_finish[i] |= indices[i] == self.EOS

        target_ids = list(map(list,
                              zip(*target_ids[1:])))  # [batch, target_seq]
        target_mask = torch.tensor([x[1:] for x in target_mask
                                    ]).to(device)  # [batch, target_seq]
        target_prob = torch.stack(target_prob, dim=1)  # [batch, target_seq]
        return target_vec, target_ids, target_prob, target_mask

Example #13

Show file

    def predict_mstcn(self,
                      model_dir,
                      results_dir,
                      features_path,
                      vid_list_file,
                      epoch,
                      actions_dict,
                      device,
                      sample_rate,
                      bsn_result_path,
                      mstcn_use_lbp,
                      poolingLength=99):
        self.model.eval()
        inverse_dict = {v: k for k, v in actions_dict.items()}
        lbp = LocalBarrierPooling(poolingLength)
        lbp = lbp.to(device)

        with torch.no_grad():
            self.model.to(device)
            self.model.load_state_dict(
                torch.load(model_dir + "/epoch-" + str(epoch) + ".model"))
            file_ptr = open(vid_list_file, 'r')
            list_of_vids = file_ptr.read().split('\n')[:-1]
            file_ptr.close()
            for vid in list_of_vids:
                print(vid)
                features = np.load(features_path + vid.split('.')[0] + '.npy')
                features = features[:, ::sample_rate]
                if mstcn_use_lbp:
                    num_frames = np.shape(features)[1]
                    barrier_file = bsn_result_path + vid + ".csv"
                    barrier = np.array(pd.read_csv(barrier_file))
                    temporal_scale = np.shape(barrier)[0]
                    barrier = np.transpose(barrier)
                    barrier = torch.tensor(
                        barrier, dtype=torch.float)  #size=[num_frames]
                    if temporal_scale <= num_frames:
                        resize_barrier = F.interpolate(barrier,
                                                       size=num_frames,
                                                       mode='nearest')
                    else:
                        resize_barrier = barrier
                    resize_barrier = resize_barrier.unsqueeze(0)
                    resize_barrier = resize_barrier.unsqueeze(
                        0)  # size=[1,1,num_frames]
                    resize_barrier = resize_barrier.to(device)

                input_x = torch.tensor(features, dtype=torch.float)
                input_x.unsqueeze_(0)
                input_x = input_x.to(device)
                predictions = self.model(
                    input_x, torch.ones(input_x.size(), device=device))
                predictions = predictions[-1]
                if mstcn_use_lbp:
                    if temporal_scale <= num_frames:
                        predictions = lbp(predictions, resize_barrier)
                    else:
                        predictions = F.interpolate(predictions,
                                                    size=temporal_scale,
                                                    mode='linear',
                                                    align_corners=False)
                        predictions = lbp(predictions, resize_barrier)
                        predictions = F.interpolate(predictions,
                                                    size=num_frames,
                                                    mode='linear',
                                                    align_corners=False)
                predictions = F.softmax(predictions, dim=1)
                entropy = Categorical(
                    probs=predictions.squeeze(0).transpose(1, 0)).entropy()
                entropy = entropy.cpu().numpy().astype(np.str)

                f_name = vid.split('/')[-1].split('.')[0]
                f_ptr = open(results_dir + "/entropy_" + f_name, "w")
                f_ptr.write(' '.join(entropy))
                f_ptr.close()

                _, predicted = torch.max(predictions.data, 1)
                predicted = predicted.squeeze()
                recognition = []
                for i in range(len(predicted)):
                    recognition = np.concatenate(
                        (recognition,
                         [inverse_dict[predicted[i].item()]] * sample_rate))
                f_name = vid.split('/')[-1].split('.')[0]
                f_ptr = open(results_dir + "/" + f_name, "w")
                f_ptr.write("### Frame level recognition: ###\n")
                f_ptr.write(' '.join(recognition))
                f_ptr.close()

Example #14

Show file

    def evaluate(self, **kwargs):
        true_labels = np.zeros(0)
        all_preds = np.zeros(0)
        all_correct = np.zeros(0)
        conf_true_labels = np.zeros(0)
        brier_scores = []
        entropies = np.zeros(0)
        acc = []
        nll = []

        with torch.no_grad():
            for batch_num, batch in enumerate(self.test_loader):
                x, y = batch
                x = x.to(self.device)

                if not self.ensemble:
                    out = self.model(x)
                else:
                    out = 0
                    for model in self.ensemble:
                        out += model(x)
                    out /= len(self.ensemble)
                # Logits to probability distribution
                probs = F.softmax(out, dim=-1)
                # Maximum softmax probability
                preds, indices = torch.max(probs, dim=-1)
                # Label predictions
                label_preds = probs.argmax(dim=-1, keepdim=True).view_as(y)
                # Compute accuracy
                corrects = y.eq(label_preds.cpu())
                correct = corrects.sum().item()
                acc.append(correct / out.shape[0])

                all_correct = np.concatenate(
                    (all_correct, corrects.cpu().numpy()))

                # Compute entropy
                entropy = Categorical(probs).entropy().squeeze()
                entropies = np.concatenate((entropies, entropy.cpu().numpy()))

                # Compute brier score
                brier_scores.append(calculate_brier_score(probs, y))

                # Compute NLL
                nll.append(-np.mean(np.log(preds.cpu().numpy())))

                true_labels = np.concatenate((true_labels, np.ones(len(x))))
                all_preds = np.concatenate((all_preds, preds.cpu().reshape(
                    (-1))))
                conf_true_labels = np.concatenate(
                    (conf_true_labels, torch.isclose(
                        y.cpu(),
                        indices.cpu()).numpy().astype(float).reshape(-1)))

        conf_auroc = calculate_auroc(conf_true_labels, all_preds)
        conf_aupr = calculate_aupr(conf_true_labels, all_preds)
        brier_score = np.mean(np.array(brier_scores))
        ece = calculate_ece(all_preds, all_correct)

        return {
            'conf_auroc': conf_auroc,
            'conf_aupr': conf_aupr,
            'brier_score': brier_score,
            'entropy': np.mean(entropies),
            'test_acc': np.mean(acc),
            'nll': np.mean(nll),
            'ece': ece,
        }, true_labels, all_preds, entropies

Example #15

Show file

File: train.py Project: zxjzxj9/RLexp

 def act(self, x):
     with torch.no_grad():
         logits = self(x)
         m = Categorical(logits=logits).sample().squeeze()
     return m.cpu().item()

Example #16

Show file

def main():

    # make the environments
    if args.num_envs == 1:
        env = [gym.make(args.env_name)]
    else:
        env = [gym.make(args.env_name) for i in range(args.num_envs)]

    env = MultiGym(env, render=args.render)

    n_states = env.observation_space.shape
    n_actions = env.action_space.n
    print('state shape:', n_states, 'actions:', n_actions)

    policy = ConvPolicy(n_actions).to(device)
    optimizer = optim.RMSprop(policy.parameters(), lr=args.lr)

    if args.algo == 'ppo':
        sys.path.append('../')
        from algorithms.ppo import PPO
        update_algo = PPO(policy=policy,
                          optimizer=optimizer,
                          num_steps=args.num_steps,
                          num_envs=args.num_envs,
                          state_size=(4, 105, 80),
                          entropy_coef=args.entropy,
                          gamma=args.gamma,
                          device=device,
                          epochs=args.ppo_epochs)
    else:
        sys.path.append('../')
        from algorithms.a2c import A2C
        update_algo = A2C(policy=policy,
                          optimizer=optimizer,
                          num_steps=args.num_steps,
                          num_envs=args.num_envs,
                          state_size=(4, 105, 80),
                          entropy_coef=args.entropy,
                          gamma=args.gamma,
                          device=device)

    end_rewards = []

    try:
        print('starting episodes')
        idx = 0
        d = False
        reward_sum = np.zeros((args.num_envs))
        restart = True
        frame = env.reset()
        mask = torch.ones(args.num_envs)
        all_start = time.time()

        for update_idx in range(args.num_updates):
            update_algo.policy.train()

            # stack the frames
            s = train_state_proc.proc_state(frame, mask=mask)

            # insert state before getting actions
            update_algo.states[0].copy_(s)

            start = time.time()
            for step in range(args.num_steps):

                with torch.no_grad():
                    # get probability dist and values
                    p, v = update_algo.policy(update_algo.states[step])
                    a = Categorical(p).sample()

                # take action get response
                frame, r, d = env.step(
                    a.cpu().numpy() if args.num_envs > 1 else [a.item()])
                s = train_state_proc.proc_state(frame, mask)

                update_algo.insert_experience(step=step,
                                              s=s,
                                              a=a,
                                              v=v,
                                              r=r,
                                              d=d)

                mask = torch.tensor(1. - d).float()
                reward_sum = (reward_sum + r)

                # if any episode finished append episode reward to list
                if d.any():
                    end_rewards.extend(reward_sum[d])

                # reset any rewards that finished
                reward_sum = reward_sum * mask.numpy()

                idx += 1

            with torch.no_grad():
                _, next_val = update_algo.policy(update_algo.states[-1])

            update_algo.update(next_val.view(1, args.num_envs).to(device),
                               next_mask=mask.to(device))

            if args.lr_decay:
                for params in update_algo.optimizer.param_groups:
                    params['lr'] = (
                        lr_min + 0.5 * (args.lr - lr_min) *
                        (1 + np.cos(np.pi * idx / args.num_updates)))

            # update every so often by displaying results in term
            if (update_idx % args.log_interval
                    == 0) and (len(end_rewards) > 0):
                total_steps = (idx + 1) * args.num_envs * args.num_steps
                end = time.time()
                print(end_rewards[-10:])
                print('Updates {}\t  Time: {:.4f} \t FPS: {}'.format(
                    update_idx, end - start,
                    int(total_steps / (end - all_start))))
                print(
                    'Mean Episode Rewards: {:.2f} \t Min/Max Current Rewards: {}/{}'
                    .format(np.mean(end_rewards[-10:]), reward_sum.min(),
                            reward_sum.max()))

    except KeyboardInterrupt:
        pass

    torch.save(
        update_algo.policy.state_dict(),
        '../model_weights/{}_{}_conv.pth'.format(args.env_name, args.algo))

    import pandas as pd

    out_dict = {'avg_end_rewards': end_rewards}
    out_log = pd.DataFrame(out_dict)
    out_log.to_csv('../logs/{}_{}_rewards.csv'.format(args.env_name,
                                                      args.algo),
                   index=False)

    out_dict = {
        'actor losses': update_algo.actor_losses,
        'critic losses': update_algo.critic_losses,
        'entropy': update_algo.entropy_logs
    }
    out_log = pd.DataFrame(out_dict)
    out_log.to_csv('../logs/{}_{}_training_behavior.csv'.format(
        args.env_name, args.algo),
                   index=False)

    plt.plot(end_rewards)
    plt.show()