def get_actions(self, state_repr): """Returns list of possible actions from current state.""" if self.subtask is None: return [(0, 0)] # Convert repr into an environment object. state = self.repr_to_env_dict[state_repr] subtask_agents = self.get_subtask_agents(env_state=state) output_actions = [] # Return single-agent actions. if not self.is_joint: agent = subtask_agents[0] output_actions = nav_utils.get_single_actions(env=state, agent=agent) # Return joint-agent actions. else: agent_1, agent_2 = subtask_agents valid_actions = list( product(nav_utils.get_single_actions(env=state, agent=agent_1), nav_utils.get_single_actions(env=state, agent=agent_2))) # Only consider action to be valid if agents do not collide. for va in valid_actions: agent1, agent2 = va execute = state.is_collision(agent1_loc=agent_1.location, agent2_loc=agent_2.location, agent1_action=agent1, agent2_action=agent2) if all(execute): output_actions.append(va) return output_actions
def plan(self, env, initializing_priors=False): """Plan next action---relevant for navigation planner.""" print( 'right before planning, {} had old subtask {}, new subtask {}, subtask complete {}' .format(self.name, self.subtask, self.new_subtask, self.subtask_complete)) # Check whether this subtask is done. if self.new_subtask is not None: self.def_subtask_completion(env=env) # If subtask is None, then do nothing. if (self.new_subtask is None) or (not self.new_subtask_agent_names): actions = nav_utils.get_single_actions(env=env, agent=self) probs = [] for a in actions: if a == (0, 0): probs.append(self.none_action_prob) else: probs.append( (1.0 - self.none_action_prob) / (len(actions) - 1)) self.action = actions[np.random.choice(len(actions), p=probs)] # Otherwise, plan accordingly. else: if self.model_type == 'greedy' or initializing_priors: other_agent_planners = {} else: # Determine other agent planners for level 1 planning. # Other agent planners are based on your planner---agents never # share planners. backup_subtask = self.new_subtask if self.new_subtask is not None else self.subtask other_agent_planners = self.delegator.get_other_agent_planners( obs=copy.copy(env), backup_subtask=backup_subtask) print("[ {} Planning ] Task: {}, Task Agents: {}".format( self.name, self.new_subtask, self.new_subtask_agent_names)) action = self.planner.get_next_action( env=env, subtask=self.new_subtask, subtask_agent_names=self.new_subtask_agent_names, other_agent_planners=other_agent_planners) # If joint subtask, pick your part of the simulated joint plan. if self.name not in self.new_subtask_agent_names and self.planner.is_joint: self.action = action[0] else: self.action = action[self.new_subtask_agent_names.index( self.name)] if self.planner.is_joint else action # Update subtask. self.subtask = self.new_subtask self.subtask_agent_names = self.new_subtask_agent_names self.new_subtask = None self.new_subtask_agent_names = [] print('{} proposed action: {}\n'.format(self.name, self.action))
def prob_nav_actions(self, obs_tm1, actions_tm1, subtask, subtask_agent_names, beta, no_level_1): """Return probabability that subtask_agents performed subtask, given previous observations (obs_tm1) and actions (actions_tm1). Args: obs_tm1: Copy of environment object. Represents environment at t-1. actions_tm1: Dictionary of agent actions. Maps agent str names to tuple actions. subtask: Subtask object to perform inference for. subtask_agent_names: Tuple of agent str names, of agents who perform subtask. subtask and subtask_agent_names make up subtask allocation. beta: Beta float value for softmax function. no_level_1: Bool, whether to turn off level-k planning. Returns: A float probability update of whether agents in subtask_agent_names are performing subtask. """ print( "[BayesianDelgation.prob_nav_actions] Calculating probs for subtask {} by {}" .format(str(subtask), ' & '.join(subtask_agent_names))) assert len(subtask_agent_names) == 1 or len(subtask_agent_names) == 2 # Perform inference over None subtasks. if subtask is None: assert len(subtask_agent_names) != 2, "Two agents are doing None." sim_agent = list( filter(lambda a: a.name == self.agent_name, obs_tm1.sim_agents))[0] # Get the number of possible actions at obs_tm1 available to agent. num_actions = len(get_single_actions(env=obs_tm1, agent=sim_agent)) - 1 action_prob = (1.0 - self.none_action_prob) / (num_actions ) # exclude (0, 0) diffs = [self.none_action_prob] + [action_prob] * num_actions softmax_diffs = sp.special.softmax(beta * np.asarray(diffs)) # Probability agents did nothing for None subtask. if actions_tm1[subtask_agent_names[0]] == (0, 0): return softmax_diffs[0] # Probability agents moved for None subtask. else: return softmax_diffs[1] # Perform inference over all non-None subtasks. # Calculate Q_{subtask}(obs_tm1, action) for all actions. action = tuple([actions_tm1[a_name] for a_name in subtask_agent_names]) if len(subtask_agent_names) == 1: action = action[0] state, other_planners = self.get_appropriate_state_and_other_agent_planners( obs_tm1=obs_tm1, backup_subtask=subtask, no_level_1=no_level_1) self.planner.set_settings(env=obs_tm1, subtask=subtask, subtask_agent_names=subtask_agent_names, other_agent_planners=other_planners) old_q = self.planner.Q(state=state, action=action, value_f=self.planner.v_l) # Collect actions the agents could have taken in obs_tm1. valid_nav_actions = self.planner.get_actions( state_repr=obs_tm1.get_repr()) # Check action taken is in the list of actions available to agents in obs_tm1. assert action in valid_nav_actions, "valid_nav_actions: {}\nlocs: {}\naction: {}".format( valid_nav_actions, list(filter(lambda a: a.location, state.sim_agents)), action) # If subtask allocation is joint, then find joint actions that match what the other # agent's action_tm1. if len(subtask_agent_names ) == 2 and self.agent_name in subtask_agent_names: other_index = 1 - subtask_agent_names.index(self.agent_name) valid_nav_actions = list( filter(lambda x: x[other_index] == action[other_index], valid_nav_actions)) # Calculating the softmax Q_{subtask} for each action. qdiffs = [ old_q - self.planner.Q( state=state, action=nav_action, value_f=self.planner.v_l) for nav_action in valid_nav_actions ] softmax_diffs = sp.special.softmax(beta * np.asarray(qdiffs)) # Taking the softmax of the action actually taken. return softmax_diffs[valid_nav_actions.index(action)]