def get_action(self, state: torch.Tensor, online_actor: torch.nn.Module, training: bool = False) -> Action: """Returns actions for given state as per current policy.""" def get_actions_(): online_actor.eval() with torch.no_grad(): actions_ = online_actor(state) online_actor.train() return actions_ if training: r = np.random.random() if r <= self.epsilon: action = self.random_action_generator.sample() else: action = get_actions_().cpu().data.numpy() if self.random_action_generator.continuous_actions: action = np.clip( action, self.random_action_generator.continuous_action_range[0], self.random_action_generator.continuous_action_range[1], ) # epsilon greedy policy else: action = get_actions_().cpu().data.numpy() action = Action(value=action) return action
def get_action(self, state: torch.Tensor, online_actor: torch.nn.Module) -> Action: """Returns actions for given state as per current policy.""" def get_actions_(): online_actor.eval() with torch.no_grad(): actions_ = online_actor(state) online_actor.train() return actions_ if self.epsilon_scheduler: if self.training: r = np.random.random() if r <= self.epsilon: action = self.random_brain_action_generator.sample() else: action = get_actions_().cpu().data.numpy() if self.random_brain_action_generator.continuous_actions: action = np.clip( action, self.random_brain_action_generator.continuous_action_range[0], self.random_brain_action_generator.continuous_action_range[1], ) # epsilon greedy policy else: action = get_actions_().cpu().data.numpy() elif self.noise: action = get_actions_().cpu().data.numpy() if self.training: action += self.noise.sample(action) action = np.clip(action, self.action_range[0], self.action_range[1]) else: raise ValueError('Must provide either epsilon_scheduler or noise') return Action(value=action)
def get_action(self, state: np.array, model: torch.nn.Module) -> Action: model.eval() with torch.no_grad(): action_values = model.forward(state, act=True) model.train() action = action_values.max(1)[1].cpu().numpy() action = Action(value=action) return action
def get_action(self, state: np.array, model: torch.nn.Module) -> Action: """ Implement this function for speed""" model.eval() with torch.no_grad(): action_values = model.forward(state, act=True) model.train() probs = torch.nn.functional.softmax(action_values) action = np.array([ np.random.choice(np.arange(0, self.action_size), p=probs.view(-1).numpy()) ]) action = Action(value=action) return action
def step_agents_fn(brain_set: BrainSet, next_brain_environment: dict, t: int): for brain_name, brain_environment in next_brain_environment.items(): agent = brain_set[brain_name].agents[0] for i in range(NUM_AGENTS): action = brain_environment['actions'][0].value[i] action = action[np.newaxis, ...] brain_agent_experience = Experience( state=brain_environment['states'][i].unsqueeze(0), action=Action(value=action), reward=brain_environment['rewards'][i], next_state=brain_environment['next_states'][i].unsqueeze(0), done=brain_environment['dones'][i], t_step=t, ) agent.step(brain_agent_experience)
def get_action(self, agent_state: torch.FloatTensor, joint_state: torch.FloatTensor, joint_action: Optional[torch.FloatTensor] = None, action: Optional[torch.FloatTensor] = None, *args, **kwargs) -> Action: """Returns actions for given states as per target policy. :param agent_state: States for this agent :param joint_state: States for all agents :param joint_action: Actions for all agents :param action: Action for this agent :return: Action containing: - action (Tensor): predicted action - log_prob (Tensor): log probability of current action distribution - value (Tensor): estimate value function """ other_agent_states = self.get_other_agent_attributes( joint_state, self.map_agent_to_state_slice, flatten=False) other_agent_actions = self.get_other_agent_attributes( joint_action, self.map_agent_to_action_slice, flatten=False) if joint_action is not None else None self.target_actor_critic.eval() with torch.no_grad(): actions, log_probs, _, values = self.target_actor_critic( agent_state=agent_state, other_agent_states=other_agent_states, other_agent_actions=other_agent_actions, action=action, scale=self.std_scale) if actions.dim() == 1: actions = actions.unsqueeze(0) actions = actions.cpu().data.numpy() self.target_actor_critic.train() if self.continuous_actions and self.continuous_action_range_clip: actions = actions.clip(self.continuous_action_range_clip[0], self.continuous_action_range_clip[1]) return Action(value=actions, log_probs=log_probs, critic_values=values)
def get_action(self, state: np.array, model: torch.nn.Module) -> Action: """ Implement this function for speed""" def _get_action_values(): model.eval() with torch.no_grad(): action_values = model.forward(state, act=True) model.train() return action_values if self.training: action_values_ = _get_action_values() if random.random() > self.epsilon: action = action_values_.max(1)[1].data[0] else: probs = torch.nn.functional.softmax(action_values_) action = np.random.choice(np.arange(0, self.action_size), p=probs.view(-1).numpy()) else: action_values_ = _get_action_values() action = action_values_.max(1)[1].data[0] return Action(value=action)
def get_action(self, states, *args, **kwargs) -> Action: """Returns actions for given states as per target policy. :param states: States from environment :return: Action containing: - action (Tensor): predicted action - log_prob (Tensor): log probability of current action distribution - value (Tensor): estimate value function """ # Use the target_actor_critic to get new actions states = states.to(device) self.target_actor_critic.eval() with torch.no_grad(): actions, log_probs, _, values = self.target_actor_critic( state=states, scale=self.std_scale) if actions.dim() == 1: actions = actions.unsqueeze(0) actions = actions.cpu().data.numpy() self.target_actor_critic.train() if self.continuous_actions and self.continuous_action_range_clip: actions = actions.clip(self.continuous_action_range_clip[0], self.continuous_action_range_clip[1]) return Action(value=actions, log_probs=log_probs, critic_values=values)
def get_random_action(self, *args) -> Action: """ Get a random action (used for warmup) """ action = self.random_action_generator.sample() action = Action(value=action) return action
def get_random_action(self, state: torch.Tensor, *args, **kwargs) -> Action: action = np.array( np.random.random_integers(0, self.action_size - 1, (1, ))) action = Action(value=action) return action
def get_random_action(self, *args, **kwargs) -> Action: # action = torch.rand(1, self.action_size) action = torch.randint(0, self.action_size, (1, 1)) action = action.cpu().data.numpy() return Action(value=action)
def get_action(self, state, training=True, *args, **kwargs) -> Action: # action = torch.rand(1, self.action_size) action = torch.randint(0, self.action_size + 1, (1, 1)) action = action.cpu().data.numpy() return Action(value=action)