Example #1
0
 def _get_transition_value(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
     next_state: Optional[D.T_state] = None,
 ) -> D.T_agent[Value[D.T_value]]:
     if memory == self._goal:
         return Value(cost=0)
     else:
         return Value(cost=1)
Example #2
0
 def __init__(
     self,
     domain_factory: Callable[[], Domain],
     heuristic: Optional[Callable[[Domain, D.T_state],
                                  D.T_agent[Value[D.T_value]]]] = None,
     discount: float = 1.0,
     max_tip_expanions: int = 1,
     parallel: bool = False,
     shared_memory_proxy=None,
     detect_cycles: bool = False,
     debug_logs: bool = False,
 ) -> None:
     ParallelSolver.__init__(
         self,
         domain_factory=domain_factory,
         parallel=parallel,
         shared_memory_proxy=shared_memory_proxy,
     )
     self._solver = None
     self._discount = discount
     self._max_tip_expansions = max_tip_expanions
     self._detect_cycles = detect_cycles
     self._debug_logs = debug_logs
     if heuristic is None:
         self._heuristic = lambda d, s: Value(cost=0)
     else:
         self._heuristic = heuristic
     self._lambdas = [self._heuristic]
     self._ipc_notify = True
Example #3
0
 def __init__(
     self,
     domain_factory: Callable[[], Domain],
     state_features: Callable[[Domain, D.T_state], Any],
     heuristic: Callable[[Domain, D.T_state],
                         D.T_agent[Value[D.T_value]]],
     termination_checker: Callable[[Domain, D.T_state],
                                   D.T_agent[D.T_predicate]],
     parallel: bool = False,
     shared_memory_proxy=None,
     debug_logs: bool = False,
 ) -> None:
     ParallelSolver.__init__(
         self,
         domain_factory=domain_factory,
         parallel=parallel,
         shared_memory_proxy=shared_memory_proxy,
     )
     self._solver = None
     self._domain = None
     self._state_features = state_features
     self._termination_checker = termination_checker
     self._debug_logs = debug_logs
     if heuristic is None:
         self._heuristic = lambda d, s: Value(cost=0)
     else:
         self._heuristic = heuristic
     self._lambdas = [
         self._state_features,
         self._heuristic,
         self._termination_checker,
     ]
     self._ipc_notify = True
Example #4
0
 def __init__(
     self,
     domain_factory: Callable[[], Domain] = None,
     heuristic: Optional[Callable[[Domain, D.T_state],
                                  D.T_agent[Value[D.T_value]]]] = None,
     use_labels: bool = True,
     time_budget: int = 3600000,
     rollout_budget: int = 100000,
     max_depth: int = 1000,
     epsilon_moving_average_window: int = 100,
     epsilon: float = 0.001,
     discount: float = 1.0,
     online_node_garbage: bool = False,
     continuous_planning: bool = True,
     parallel: bool = False,
     shared_memory_proxy=None,
     debug_logs: bool = False,
     watchdog: Callable[[int, int, float, float], bool] = None,
 ) -> None:
     ParallelSolver.__init__(
         self,
         domain_factory=domain_factory,
         parallel=parallel,
         shared_memory_proxy=shared_memory_proxy,
     )
     self._solver = None
     if heuristic is None:
         self._heuristic = lambda d, s: Value(cost=0)
     else:
         self._heuristic = heuristic
     self._lambdas = [self._heuristic]
     self._use_labels = use_labels
     self._time_budget = time_budget
     self._rollout_budget = rollout_budget
     self._max_depth = max_depth
     self._epsilon_moving_average_window = epsilon_moving_average_window
     self._epsilon = epsilon
     self._discount = discount
     self._online_node_garbage = online_node_garbage
     self._continuous_planning = continuous_planning
     self._debug_logs = debug_logs
     if watchdog is None:
         self._watchdog = (lambda elapsed_time, number_rollouts,
                           best_value, epsilon_moving_average: True)
     else:
         self._watchdog = watchdog
     self._ipc_notify = True
Example #5
0
 def __init__(
     self,
     domain_factory: Callable[[], Domain] = None,
     heuristic: Optional[Callable[[Domain, D.T_state],
                                  D.T_agent[Value[D.T_value]]]] = None,
     parallel: bool = False,
     shared_memory_proxy=None,
     debug_logs: bool = False,
 ) -> None:
     ParallelSolver.__init__(
         self,
         domain_factory=domain_factory,
         parallel=parallel,
         shared_memory_proxy=shared_memory_proxy,
     )
     self._solver = None
     self._debug_logs = debug_logs
     if heuristic is None:
         self._heuristic = lambda d, s: Value(cost=0)
     else:
         self._heuristic = heuristic
     self._lambdas = [self._heuristic]
     self._ipc_notify = True
Example #6
0
 def __init__(
     self,
     domain_factory: Callable[[], Domain] = None,
     heuristic: Optional[
         Callable[
             [Domain, D.T_state],
             Tuple[
                 D.T_agent[Value[D.T_value]],
                 D.T_agent[D.T_concurrency[D.T_event]],
             ],
         ]
     ] = None,
     time_budget: int = 3600000,
     rollout_budget: int = 100000,
     max_depth: int = 1000,
     max_feasibility_trials: int = 0,  # will then choose nb_agents if 0
     graph_expansion_rate: float = 0.1,
     epsilon_moving_average_window: int = 100,
     epsilon: float = 0.0,  # not a stopping criterion by default
     discount: float = 1.0,
     action_choice_noise: float = 0.1,
     dead_end_cost: float = 10000,
     online_node_garbage: bool = False,
     continuous_planning: bool = True,
     parallel: bool = False,
     shared_memory_proxy=None,
     debug_logs: bool = False,
     watchdog: Callable[[int, int, float, float], bool] = None,
 ) -> None:
     ParallelSolver.__init__(
         self,
         domain_factory=domain_factory,
         parallel=parallel,
         shared_memory_proxy=shared_memory_proxy,
     )
     self._solver = None
     if heuristic is None:
         self._heuristic = lambda d, s: (
             {a: Value(cost=0) for a in s},
             {a: None for a in s},
         )
     else:
         self._heuristic = heuristic
     self._lambdas = [self._heuristic]
     self._time_budget = time_budget
     self._rollout_budget = rollout_budget
     self._max_depth = max_depth
     self._max_feasibility_trials = max_feasibility_trials
     self._graph_expansion_rate = graph_expansion_rate
     self._epsilon_moving_average_window = epsilon_moving_average_window
     self._epsilon = epsilon
     self._discount = discount
     self._action_choice_noise = action_choice_noise
     self._dead_end_cost = dead_end_cost
     self._online_node_garbage = online_node_garbage
     self._continuous_planning = continuous_planning
     self._debug_logs = debug_logs
     if watchdog is None:
         self._watchdog = (
             lambda elapsed_time, number_rollouts, best_value, epsilon_moving_average: True
         )
     else:
         self._watchdog = watchdog
     self._ipc_notify = True
Example #7
0
    def _state_sample(
        self,
        memory: D.T_memory[D.T_state],
        action: D.T_agent[D.T_concurrency[D.T_event]],
    ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                           D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
        next_state = {}
        transition_value = {}
        occupied_cells = {}
        dead_end = {a: False for a in memory}

        for agent, state in memory.items():
            if state == self._agents_goals[agent]:
                next_state[agent] = state
                transition_value[agent] = Value(cost=0)
                continue
            if (action[agent] == AgentAction.stay
                ):  # must test after goal check for proper cost setting
                next_state[agent] = state
                transition_value[agent] = Value(cost=1)
                continue
            next_state_1 = next_state_2 = next_state_3 = state
            if action[agent] == AgentAction.left:
                if state.x > 0 and self._maze[state.y][state.x - 1] == 1:
                    next_state_1 = AgentState(x=state.x - 1, y=state.y)
                    if state.y > 0 and self._maze[state.y - 1][state.x -
                                                               1] == 1:
                        next_state_2 = AgentState(x=state.x - 1, y=state.y - 1)
                    if (state.y < self._num_rows - 1
                            and self._maze[state.y + 1][state.x - 1] == 1):
                        next_state_2 = AgentState(x=state.x - 1, y=state.y + 1)
            elif action[agent] == AgentAction.right:
                if (state.x < self._num_cols - 1
                        and self._maze[state.y][state.x + 1] == 1):
                    next_state_1 = AgentState(x=state.x + 1, y=state.y)
                    if state.y > 0 and self._maze[state.y - 1][state.x +
                                                               1] == 1:
                        next_state_2 = AgentState(x=state.x + 1, y=state.y - 1)
                    if (state.y < self._num_rows - 1
                            and self._maze[state.y + 1][state.x + 1] == 1):
                        next_state_2 = AgentState(x=state.x + 1, y=state.y + 1)
            elif action[agent] == AgentAction.up:
                if state.y > 0 and self._maze[state.y - 1][state.x] == 1:
                    next_state_1 = AgentState(x=state.x, y=state.y - 1)
                    if state.x > 0 and self._maze[state.y - 1][state.x -
                                                               1] == 1:
                        next_state_2 = AgentState(x=state.x - 1, y=state.y - 1)
                    if (state.x < self._num_cols - 1
                            and self._maze[state.y - 1][state.x + 1] == 1):
                        next_state_2 = AgentState(x=state.x + 1, y=state.y - 1)
            elif action[agent] == AgentAction.down:
                if (state.y < self._num_rows - 1
                        and self._maze[state.y + 1][state.x] == 1):
                    next_state_1 = AgentState(x=state.x, y=state.y + 1)
                    if state.x > 0 and self._maze[state.y + 1][state.x -
                                                               1] == 1:
                        next_state_2 = AgentState(x=state.x - 1, y=state.y + 1)
                    if (state.x < self._num_cols - 1
                            and self._maze[state.y + 1][state.x + 1] == 1):
                        next_state_2 = AgentState(x=state.x + 1, y=state.y + 1)
            next_state[agent] = rd.choices(
                [next_state_1, next_state_2, next_state_3], [0.8, 0.1, 0.1],
                k=1)[0]
            transition_value[agent] = Value(cost=1)
            if tuple(next_state[agent]) in occupied_cells:
                dead_end[agent] = True
                dead_end[occupied_cells[tuple(next_state[agent])]] = True
                transition_value[agent] = Value(cost=1000)  # for random walk
            else:
                occupied_cells[tuple(next_state[agent])] = agent
        return TransitionOutcome(
            state=HashableDict(next_state),
            value=transition_value if not self._flatten_data else Value(
                cost=sum(v.cost for a, v in transition_value.items())),
            termination=dead_end if not self._flatten_data else all(
                t for a, t in dead_end.items()),
            info=None,
        )
Example #8
0
     lambda etime, nbr, bval, ema: martdp_watchdog(
         etime, nbr, bval, ema),
     "online_node_garbage":
     True,
     "continuous_planning":
     False,
     "debug_logs":
     False,
 },
 "singleagent_solver_kwargs": {
     "domain_factory":
     lambda: lambda multiagent_domain, agent: SingleAgentMaze(
         multiagent_domain._maze, multiagent_domain.
         _agents_goals[agent]),
     "heuristic":
     lambda d, s: Value(cost=sqrt((d._goal.x - s.x)**2 +
                                  (d._goal.y - s.y)**2)),
     "use_labels":
     True,
     "time_budget":
     1000,
     "max_depth":
     100,
     "continuous_planning":
     False,
     "online_node_garbage":
     False,
     "parallel":
     False,
     "debug_logs":
     False,
 },
Example #9
0
 def _multiagent_heuristic(
     self, observation: D.T_agent[D.T_observation]
 ) -> Tuple[D.T_agent[Value[D.T_value]], D.T_agent[D.T_concurrency[D.T_event]]]:
     h = {}
     for a, s in self._singleagent_solvers.items():
         if observation[a] not in self._singleagent_solutions[a]:
             undefined_solution = False
             s.solve_from(observation[a])
             if hasattr(self._singleagent_solvers[a], "get_policy"):
                 p = self._singleagent_solvers[a].get_policy()
                 for ps, pav in p.items():
                     self._singleagent_solutions[a][ps] = pav[::-1]
                 undefined_solution = (
                     observation[a] not in self._singleagent_solutions[a]
                 )
             else:
                 if not s.is_solution_defined_for(observation[a]):
                     undefined_solution = True
                 else:
                     self._singleagent_solutions[a][observation[a]] = (
                         s.get_utility(observation[a]),
                         s.get_next_action(observation[a]),
                     )
             if undefined_solution:
                 is_terminal = (
                     hasattr(self._get_singleagent_domain(a), "is_goal")
                     and self._get_singleagent_domain(a).is_goal(observation[a])
                 ) or (
                     hasattr(self._get_singleagent_domain(a), "is_terminal")
                     and self._get_singleagent_domain(a).is_terminal(observation[a])
                 )
                 if not is_terminal:
                     print(
                         "\x1b[3;33;40m"
                         + "/!\ Solution not defined for agent {} in non terminal state {}".format(
                             a, observation[a]
                         )
                         + ": Assigning default action! (is it a terminal state without no-op action?)"
                         "\x1b[0m"
                     )
                 try:
                     self._singleagent_solutions[a][observation[a]] = (
                         0,
                         self._get_singleagent_domain(a)
                         .get_applicable_actions(observation[a])
                         .sample(),
                     )
                 except Exception as err:
                     terminal_str = "terminal " if is_terminal else ""
                     raise RuntimeError(
                         "Cannot sample applicable action "
                         "for agent {} in {}state {} "
                         "(original exception is: {})".format(
                             a, terminal_str, observation[a], err
                         )
                     )
     if issubclass(self._multiagent_solver.T_domain, SingleAgent):
         h = (
             Value(
                 cost=sum(
                     p[observation[a]][0]
                     for a, p in self._singleagent_solutions.items()
                 )
             ),
             {
                 a: p[observation[a]][1]
                 for a, p in self._singleagent_solutions.items()
             },
         )
     else:
         h = (
             {
                 a: Value(cost=p[observation[a]][0])
                 for a, p in self._singleagent_solutions.items()
             },
             {
                 a: p[observation[a]][1]
                 for a, p in self._singleagent_solutions.items()
             },
         )
     return h