Exemple #1
0
    def get_actions(self, state_repr):
        """Returns list of possible actions from current state."""
        if self.subtask is None:
            return [(0, 0)]
        # Convert repr into an environment object.
        state = self.repr_to_env_dict[state_repr]

        subtask_agents = self.get_subtask_agents(env_state=state)
        output_actions = []

        # Return single-agent actions.
        if not self.is_joint:
            agent = subtask_agents[0]
            output_actions = nav_utils.get_single_actions(env=state,
                                                          agent=agent)
        # Return joint-agent actions.
        else:
            agent_1, agent_2 = subtask_agents
            valid_actions = list(
                product(nav_utils.get_single_actions(env=state, agent=agent_1),
                        nav_utils.get_single_actions(env=state,
                                                     agent=agent_2)))
            # Only consider action to be valid if agents do not collide.
            for va in valid_actions:
                agent1, agent2 = va
                execute = state.is_collision(agent1_loc=agent_1.location,
                                             agent2_loc=agent_2.location,
                                             agent1_action=agent1,
                                             agent2_action=agent2)
                if all(execute):
                    output_actions.append(va)
        return output_actions
Exemple #2
0
    def plan(self, env, initializing_priors=False):
        """Plan next action---relevant for navigation planner."""
        print(
            'right before planning, {} had old subtask {}, new subtask {}, subtask complete {}'
            .format(self.name, self.subtask, self.new_subtask,
                    self.subtask_complete))

        # Check whether this subtask is done.
        if self.new_subtask is not None:
            self.def_subtask_completion(env=env)

        # If subtask is None, then do nothing.
        if (self.new_subtask is None) or (not self.new_subtask_agent_names):
            actions = nav_utils.get_single_actions(env=env, agent=self)
            probs = []
            for a in actions:
                if a == (0, 0):
                    probs.append(self.none_action_prob)
                else:
                    probs.append(
                        (1.0 - self.none_action_prob) / (len(actions) - 1))
            self.action = actions[np.random.choice(len(actions), p=probs)]
        # Otherwise, plan accordingly.
        else:
            if self.model_type == 'greedy' or initializing_priors:
                other_agent_planners = {}
            else:
                # Determine other agent planners for level 1 planning.
                # Other agent planners are based on your planner---agents never
                # share planners.
                backup_subtask = self.new_subtask if self.new_subtask is not None else self.subtask
                other_agent_planners = self.delegator.get_other_agent_planners(
                    obs=copy.copy(env), backup_subtask=backup_subtask)

            print("[ {} Planning ] Task: {}, Task Agents: {}".format(
                self.name, self.new_subtask, self.new_subtask_agent_names))

            action = self.planner.get_next_action(
                env=env,
                subtask=self.new_subtask,
                subtask_agent_names=self.new_subtask_agent_names,
                other_agent_planners=other_agent_planners)

            # If joint subtask, pick your part of the simulated joint plan.
            if self.name not in self.new_subtask_agent_names and self.planner.is_joint:
                self.action = action[0]
            else:
                self.action = action[self.new_subtask_agent_names.index(
                    self.name)] if self.planner.is_joint else action

        # Update subtask.
        self.subtask = self.new_subtask
        self.subtask_agent_names = self.new_subtask_agent_names
        self.new_subtask = None
        self.new_subtask_agent_names = []

        print('{} proposed action: {}\n'.format(self.name, self.action))
Exemple #3
0
    def prob_nav_actions(self, obs_tm1, actions_tm1, subtask,
                         subtask_agent_names, beta, no_level_1):
        """Return probabability that subtask_agents performed subtask, given
        previous observations (obs_tm1) and actions (actions_tm1).

        Args:
            obs_tm1: Copy of environment object. Represents environment at t-1.
            actions_tm1: Dictionary of agent actions. Maps agent str names to tuple actions.
            subtask: Subtask object to perform inference for.
            subtask_agent_names: Tuple of agent str names, of agents who perform subtask.
                subtask and subtask_agent_names make up subtask allocation.
            beta: Beta float value for softmax function.
            no_level_1: Bool, whether to turn off level-k planning.
        Returns:
            A float probability update of whether agents in subtask_agent_names are
            performing subtask.
        """
        print(
            "[BayesianDelgation.prob_nav_actions] Calculating probs for subtask {} by {}"
            .format(str(subtask), ' & '.join(subtask_agent_names)))
        assert len(subtask_agent_names) == 1 or len(subtask_agent_names) == 2

        # Perform inference over None subtasks.
        if subtask is None:
            assert len(subtask_agent_names) != 2, "Two agents are doing None."
            sim_agent = list(
                filter(lambda a: a.name == self.agent_name,
                       obs_tm1.sim_agents))[0]
            # Get the number of possible actions at obs_tm1 available to agent.
            num_actions = len(get_single_actions(env=obs_tm1,
                                                 agent=sim_agent)) - 1
            action_prob = (1.0 - self.none_action_prob) / (num_actions
                                                           )  # exclude (0, 0)
            diffs = [self.none_action_prob] + [action_prob] * num_actions
            softmax_diffs = sp.special.softmax(beta * np.asarray(diffs))
            # Probability agents did nothing for None subtask.
            if actions_tm1[subtask_agent_names[0]] == (0, 0):
                return softmax_diffs[0]
            # Probability agents moved for None subtask.
            else:
                return softmax_diffs[1]

        # Perform inference over all non-None subtasks.
        # Calculate Q_{subtask}(obs_tm1, action) for all actions.
        action = tuple([actions_tm1[a_name] for a_name in subtask_agent_names])
        if len(subtask_agent_names) == 1:
            action = action[0]
        state, other_planners = self.get_appropriate_state_and_other_agent_planners(
            obs_tm1=obs_tm1, backup_subtask=subtask, no_level_1=no_level_1)
        self.planner.set_settings(env=obs_tm1,
                                  subtask=subtask,
                                  subtask_agent_names=subtask_agent_names,
                                  other_agent_planners=other_planners)
        old_q = self.planner.Q(state=state,
                               action=action,
                               value_f=self.planner.v_l)

        # Collect actions the agents could have taken in obs_tm1.
        valid_nav_actions = self.planner.get_actions(
            state_repr=obs_tm1.get_repr())

        # Check action taken is in the list of actions available to agents in obs_tm1.
        assert action in valid_nav_actions, "valid_nav_actions: {}\nlocs: {}\naction: {}".format(
            valid_nav_actions,
            list(filter(lambda a: a.location, state.sim_agents)), action)

        # If subtask allocation is joint, then find joint actions that match what the other
        # agent's action_tm1.
        if len(subtask_agent_names
               ) == 2 and self.agent_name in subtask_agent_names:
            other_index = 1 - subtask_agent_names.index(self.agent_name)
            valid_nav_actions = list(
                filter(lambda x: x[other_index] == action[other_index],
                       valid_nav_actions))

        # Calculating the softmax Q_{subtask} for each action.
        qdiffs = [
            old_q - self.planner.Q(
                state=state, action=nav_action, value_f=self.planner.v_l)
            for nav_action in valid_nav_actions
        ]
        softmax_diffs = sp.special.softmax(beta * np.asarray(qdiffs))
        # Taking the softmax of the action actually taken.
        return softmax_diffs[valid_nav_actions.index(action)]