def _get_transition_value( self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]], next_state: Optional[D.T_state] = None, ) -> D.T_agent[Value[D.T_value]]: if memory == self._goal: return Value(cost=0) else: return Value(cost=1)
def __init__( self, domain_factory: Callable[[], Domain], heuristic: Optional[Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]]] = None, discount: float = 1.0, max_tip_expanions: int = 1, parallel: bool = False, shared_memory_proxy=None, detect_cycles: bool = False, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._discount = discount self._max_tip_expansions = max_tip_expanions self._detect_cycles = detect_cycles self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain], state_features: Callable[[Domain, D.T_state], Any], heuristic: Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]], termination_checker: Callable[[Domain, D.T_state], D.T_agent[D.T_predicate]], parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._domain = None self._state_features = state_features self._termination_checker = termination_checker self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [ self._state_features, self._heuristic, self._termination_checker, ] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]]] = None, use_labels: bool = True, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, epsilon_moving_average_window: int = 100, epsilon: float = 0.001, discount: float = 1.0, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, watchdog: Callable[[int, int, float, float], bool] = None, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._use_labels = use_labels self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._epsilon_moving_average_window = epsilon_moving_average_window self._epsilon = epsilon self._discount = discount self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs if watchdog is None: self._watchdog = (lambda elapsed_time, number_rollouts, best_value, epsilon_moving_average: True) else: self._watchdog = watchdog self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]]] = None, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[ Callable[ [Domain, D.T_state], Tuple[ D.T_agent[Value[D.T_value]], D.T_agent[D.T_concurrency[D.T_event]], ], ] ] = None, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, max_feasibility_trials: int = 0, # will then choose nb_agents if 0 graph_expansion_rate: float = 0.1, epsilon_moving_average_window: int = 100, epsilon: float = 0.0, # not a stopping criterion by default discount: float = 1.0, action_choice_noise: float = 0.1, dead_end_cost: float = 10000, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, watchdog: Callable[[int, int, float, float], bool] = None, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None if heuristic is None: self._heuristic = lambda d, s: ( {a: Value(cost=0) for a in s}, {a: None for a in s}, ) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._max_feasibility_trials = max_feasibility_trials self._graph_expansion_rate = graph_expansion_rate self._epsilon_moving_average_window = epsilon_moving_average_window self._epsilon = epsilon self._discount = discount self._action_choice_noise = action_choice_noise self._dead_end_cost = dead_end_cost self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs if watchdog is None: self._watchdog = ( lambda elapsed_time, number_rollouts, best_value, epsilon_moving_average: True ) else: self._watchdog = watchdog self._ipc_notify = True
def _state_sample( self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]], ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]], D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]: next_state = {} transition_value = {} occupied_cells = {} dead_end = {a: False for a in memory} for agent, state in memory.items(): if state == self._agents_goals[agent]: next_state[agent] = state transition_value[agent] = Value(cost=0) continue if (action[agent] == AgentAction.stay ): # must test after goal check for proper cost setting next_state[agent] = state transition_value[agent] = Value(cost=1) continue next_state_1 = next_state_2 = next_state_3 = state if action[agent] == AgentAction.left: if state.x > 0 and self._maze[state.y][state.x - 1] == 1: next_state_1 = AgentState(x=state.x - 1, y=state.y) if state.y > 0 and self._maze[state.y - 1][state.x - 1] == 1: next_state_2 = AgentState(x=state.x - 1, y=state.y - 1) if (state.y < self._num_rows - 1 and self._maze[state.y + 1][state.x - 1] == 1): next_state_2 = AgentState(x=state.x - 1, y=state.y + 1) elif action[agent] == AgentAction.right: if (state.x < self._num_cols - 1 and self._maze[state.y][state.x + 1] == 1): next_state_1 = AgentState(x=state.x + 1, y=state.y) if state.y > 0 and self._maze[state.y - 1][state.x + 1] == 1: next_state_2 = AgentState(x=state.x + 1, y=state.y - 1) if (state.y < self._num_rows - 1 and self._maze[state.y + 1][state.x + 1] == 1): next_state_2 = AgentState(x=state.x + 1, y=state.y + 1) elif action[agent] == AgentAction.up: if state.y > 0 and self._maze[state.y - 1][state.x] == 1: next_state_1 = AgentState(x=state.x, y=state.y - 1) if state.x > 0 and self._maze[state.y - 1][state.x - 1] == 1: next_state_2 = AgentState(x=state.x - 1, y=state.y - 1) if (state.x < self._num_cols - 1 and self._maze[state.y - 1][state.x + 1] == 1): next_state_2 = AgentState(x=state.x + 1, y=state.y - 1) elif action[agent] == AgentAction.down: if (state.y < self._num_rows - 1 and self._maze[state.y + 1][state.x] == 1): next_state_1 = AgentState(x=state.x, y=state.y + 1) if state.x > 0 and self._maze[state.y + 1][state.x - 1] == 1: next_state_2 = AgentState(x=state.x - 1, y=state.y + 1) if (state.x < self._num_cols - 1 and self._maze[state.y + 1][state.x + 1] == 1): next_state_2 = AgentState(x=state.x + 1, y=state.y + 1) next_state[agent] = rd.choices( [next_state_1, next_state_2, next_state_3], [0.8, 0.1, 0.1], k=1)[0] transition_value[agent] = Value(cost=1) if tuple(next_state[agent]) in occupied_cells: dead_end[agent] = True dead_end[occupied_cells[tuple(next_state[agent])]] = True transition_value[agent] = Value(cost=1000) # for random walk else: occupied_cells[tuple(next_state[agent])] = agent return TransitionOutcome( state=HashableDict(next_state), value=transition_value if not self._flatten_data else Value( cost=sum(v.cost for a, v in transition_value.items())), termination=dead_end if not self._flatten_data else all( t for a, t in dead_end.items()), info=None, )
lambda etime, nbr, bval, ema: martdp_watchdog( etime, nbr, bval, ema), "online_node_garbage": True, "continuous_planning": False, "debug_logs": False, }, "singleagent_solver_kwargs": { "domain_factory": lambda: lambda multiagent_domain, agent: SingleAgentMaze( multiagent_domain._maze, multiagent_domain. _agents_goals[agent]), "heuristic": lambda d, s: Value(cost=sqrt((d._goal.x - s.x)**2 + (d._goal.y - s.y)**2)), "use_labels": True, "time_budget": 1000, "max_depth": 100, "continuous_planning": False, "online_node_garbage": False, "parallel": False, "debug_logs": False, },
def _multiagent_heuristic( self, observation: D.T_agent[D.T_observation] ) -> Tuple[D.T_agent[Value[D.T_value]], D.T_agent[D.T_concurrency[D.T_event]]]: h = {} for a, s in self._singleagent_solvers.items(): if observation[a] not in self._singleagent_solutions[a]: undefined_solution = False s.solve_from(observation[a]) if hasattr(self._singleagent_solvers[a], "get_policy"): p = self._singleagent_solvers[a].get_policy() for ps, pav in p.items(): self._singleagent_solutions[a][ps] = pav[::-1] undefined_solution = ( observation[a] not in self._singleagent_solutions[a] ) else: if not s.is_solution_defined_for(observation[a]): undefined_solution = True else: self._singleagent_solutions[a][observation[a]] = ( s.get_utility(observation[a]), s.get_next_action(observation[a]), ) if undefined_solution: is_terminal = ( hasattr(self._get_singleagent_domain(a), "is_goal") and self._get_singleagent_domain(a).is_goal(observation[a]) ) or ( hasattr(self._get_singleagent_domain(a), "is_terminal") and self._get_singleagent_domain(a).is_terminal(observation[a]) ) if not is_terminal: print( "\x1b[3;33;40m" + "/!\ Solution not defined for agent {} in non terminal state {}".format( a, observation[a] ) + ": Assigning default action! (is it a terminal state without no-op action?)" "\x1b[0m" ) try: self._singleagent_solutions[a][observation[a]] = ( 0, self._get_singleagent_domain(a) .get_applicable_actions(observation[a]) .sample(), ) except Exception as err: terminal_str = "terminal " if is_terminal else "" raise RuntimeError( "Cannot sample applicable action " "for agent {} in {}state {} " "(original exception is: {})".format( a, terminal_str, observation[a], err ) ) if issubclass(self._multiagent_solver.T_domain, SingleAgent): h = ( Value( cost=sum( p[observation[a]][0] for a, p in self._singleagent_solutions.items() ) ), { a: p[observation[a]][1] for a, p in self._singleagent_solutions.items() }, ) else: h = ( { a: Value(cost=p[observation[a]][0]) for a, p in self._singleagent_solutions.items() }, { a: p[observation[a]][1] for a, p in self._singleagent_solutions.items() }, ) return h