Beispiel #1
0
 def pick_action_and_get_critic_values(self,
                                       policy,
                                       state,
                                       epsilon_exploration=None):
     """Picks an action using the policy"""
     # state = torch.from_numpy(state).float().unsqueeze(0)
     state = torch.from_numpy(state).float()
     model_output = policy.forward(state)
     actor_output = model_output[:, list(
         range(self.action_size)
     )]  # we only use first set of columns to decide action, last column is state-value
     critic_output = model_output[:, -1]
     # action_distribution = create_actor_distribution(self.action_types, actor_output, self.action_size)
     action_distribution = create_actor_distribution(
         self.action_types, model_output, self.action_size)
     action = action_distribution.sample().cpu().numpy()
     if self.action_types == "CONTINUOUS": action += self.noise.sample()
     if self.action_types == "DISCRETE":
         if random.random() <= epsilon_exploration:
             action = random.randint(0, self.action_size - 1)
         else:
             action = action[0]
     action_log_prob = self.calculate_log_action_probability(
         action, action_distribution)
     return action, action_log_prob, critic_output
Beispiel #2
0
 def calculate_log_probability_of_actions(self, policy, states, actions):
     """Calculates the log probability of an action occuring given a policy and starting state"""
     policy_output = policy.forward(states).to(self.device)
     policy_distribution = create_actor_distribution(
         self.action_types, policy_output, self.action_size)
     actions_tensor = actions
     policy_distribution_log_prob = policy_distribution.log_prob(
         actions_tensor)
     return policy_distribution_log_prob
Beispiel #3
0
 def pick_action(self, policy, state):
     """Picks an action using the policy"""
     state = torch.from_numpy(state).float().unsqueeze(0)
     actor_output = policy.forward(state)
     action_distribution = create_actor_distribution(
         self.action_types, actor_output, self.action_size)
     action = action_distribution.sample().cpu().numpy()
     if self.action_types == "CONTINUOUS":
         action += self.noise.sample()
     return action
Beispiel #4
0
    def pick_action(self, policy, state, epsilon_exploration=None):
        """Picks an action using the policy"""
        if self.action_types == "DISCRETE":
            if random.random() <= epsilon_exploration:
                action = random.randint(0, self.action_size - 1)
                return action

        state = torch.from_numpy(state).float().unsqueeze(0)
        actor_output = policy.forward(state)
        if self.action_choice_output_columns is not None:
            actor_output = actor_output[:, self.action_choice_output_columns]
        action_distribution = create_actor_distribution(
            self.action_types, actor_output, self.action_size)
        action = action_distribution.sample().cpu()

        if self.action_types == "CONTINUOUS":
            action += torch.Tensor(self.noise.sample())
        else:
            action = action.item()
        return action