def calculate_log_probability_of_actions(self, policy, states, actions): """Calculates the log probability of an action occuring given a policy and starting state""" policy_output = policy.forward(states).to(self.device) policy_distribution = create_actor_distribution( self.action_types, policy_output, self.action_size) policy_distribution_log_prob = policy_distribution.log_prob(actions) return policy_distribution_log_prob
def calculate_log_probability_of_actions(self, policy, states, actions): """Calculates the log probability of an action occuring given a policy and starting state""" policy_output = policy.forward(states).to(self.device) policy_distribution = create_actor_distribution( "DISCRETE", policy_output, self.config.hyperparameters["action_space"]) policy_distribution_log_prob = policy_distribution.log_prob(actions) return policy_distribution_log_prob
def produce_action_and_action_info(self, state): """Given the state, produces an action, the probability of the action, the log probability of the action, and the argmax action""" action_probabilities = self.actor_local(state) max_probability_action = torch.argmax(action_probabilities).unsqueeze(0) action_distribution = create_actor_distribution(self.action_types, action_probabilities, self.action_size) action = action_distribution.sample().cpu() log_action_probabilities = torch.log(action_probabilities) return action, (action_probabilities, log_action_probabilities), max_probability_action
def produce_action_and_action_info(self, state): """Given the state, produces an action, the probability of the action, the log probability of the action, and the argmax action""" action_probabilities = self.actor_local(state) max_probability_action = torch.argmax(action_probabilities, dim=-1) action_distribution = create_actor_distribution(self.action_types, action_probabilities, self.action_size) action = action_distribution.sample().cpu() # Have to deal with situation of 0.0 probabilities because we can't do log 0 z = action_probabilities == 0.0 z = z.float() * 1e-8 log_action_probabilities = torch.log(action_probabilities + z) return action, (action_probabilities, log_action_probabilities), max_probability_action
def pick_action(self, state, exploration_episilon): if self.config.hyperparameters['random_policy'] and random.random( ) <= exploration_episilon: action = random.randint( 0, self.config.hyperparameters['action_space'] - 1) return action state = torch.from_numpy(state).float() actor_output = self.policy_new.forward(state) action_distribution = create_actor_distribution( "DISCRETE", actor_output, self.config.hyperparameters['action_space']) action = action_distribution.sample().cpu() return action.item()
def pick_action_and_get_critic_values(self, policy, state, epsilon_exploration=None): """Picks an action using the policy""" state = torch.from_numpy(state).float().unsqueeze(0) model_output = policy.forward(state) actor_output = model_output[:, list(range(self.action_size))] #we only use first set of columns to decide action, last column is state-value critic_output = model_output[:, -1] action_distribution = create_actor_distribution(self.action_types, actor_output, self.action_size) action = action_distribution.sample().cpu().numpy() if self.action_types == "CONTINUOUS": action += self.noise.sample() if self.action_types == "DISCRETE": if random.random() <= epsilon_exploration: action = random.randint(0, self.action_size - 1) else: action = action[0] action_log_prob = self.calculate_log_action_probability(action, action_distribution) return action, action_log_prob, critic_output
def pick_action(self, policy, state, epsilon_exploration=None): """Picks an action using the policy""" if self.action_types == "DISCRETE": if random.random() <= epsilon_exploration: action = random.randint(0, self.action_size - 1) return action state = torch.from_numpy(state).float().unsqueeze(0) actor_output = policy.forward(state) if self.action_choice_output_columns is not None: actor_output = actor_output[:, self.action_choice_output_columns] action_distribution = create_actor_distribution( self.action_types, actor_output, self.action_size) action = action_distribution.sample().cpu() if self.action_types == "CONTINUOUS": action += torch.Tensor(self.noise.sample()) else: action = action.item() return action