def __init__(self, domain_factory: Callable[[], Domain], state_features: Callable[[Domain, D.T_state], Any], heuristic: Callable[[Domain, D.T_state], float], termination_checker: Callable[[Domain, D.T_state], bool], parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False) -> None: ParallelSolver.__init__(self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy) self._solver = None self._domain = None self._state_features = state_features self._termination_checker = termination_checker self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: 0 else: self._heuristic = heuristic self._lambdas = [ self._state_features, self._heuristic, self._termination_checker ] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain], heuristic: Optional[Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]]] = None, discount: float = 1.0, max_tip_expanions: int = 1, parallel: bool = False, shared_memory_proxy=None, detect_cycles: bool = False, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._discount = discount self._max_tip_expansions = max_tip_expanions self._detect_cycles = detect_cycles self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._ipc_notify = True
def __init__(self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[Callable[[Domain, D.T_state], float]] = None, use_labels: bool = True, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, discount: float = 1.0, epsilon: float = 0.001, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False) -> None: ParallelSolver.__init__(self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy) self._solver = None if heuristic is None: self._heuristic = lambda d, s: 0 else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._use_labels = use_labels self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._discount = discount self._epsilon = epsilon self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs self._ipc_notify = True
def __init__(self, domain_factory: Callable[[], Domain], state_features: Callable[[Domain, D.T_state], Any], use_state_feature_hash: bool = False, use_simulation_domain: bool = False, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, exploration: float = 0.25, discount: float = 1.0, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False) -> None: ParallelSolver.__init__(self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy) self._solver = None self._domain = None self._state_features = state_features self._use_state_feature_hash = use_state_feature_hash self._use_simulation_domain = use_simulation_domain self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._exploration = exploration self._discount = discount self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs self._lambdas = [self._state_features] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain], state_features: Callable[[Domain, D.T_state], Any], use_state_feature_hash: bool = False, node_ordering: Callable[[float, int, int, float, int, int], bool] = None, time_budget: int = 0, # time budget to continue searching for better plans after a goal has been reached parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._domain = None self._state_features = state_features self._use_state_feature_hash = use_state_feature_hash self._node_ordering = node_ordering self._time_budget = time_budget self._debug_logs = debug_logs self._lambdas = [self._state_features] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain], heuristic: Optional[Callable[[Domain, D.T_state], float]] = None, discount: float = 1.0, epsilon: float = 0.001, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._discount = discount self._epsilon = epsilon self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: 0 else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._ipc_notify = True
def close(self): """Joins the parallel domains' processes. Not calling this method (or not using the 'with' context statement) results in the solver forever waiting for the domain processes to exit. """ if self._parallel: self._solver.close() ParallelSolver.close(self)
def __init__( self, domain_factory: Callable[[], Domain], time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, discount: float = 1.0, uct_mode: bool = True, ucb_constant: float = 1.0 / sqrt(2.0), online_node_garbage: bool = False, custom_policy: Callable[ [Domain, D.T_agent[D.T_observation]], D.T_agent[D.T_concurrency[D.T_event]], ] = None, heuristic: Callable[ [Domain, D.T_agent[D.T_observation]], Tuple[float, int] ] = None, transition_mode: Options.TransitionMode = Options.TransitionMode.Distribution, tree_policy: Options.TreePolicy = Options.TreePolicy.Default, expander: Options.Expander = Options.Expander.Full, action_selector_optimization: Options.ActionSelector = Options.ActionSelector.UCB1, action_selector_execution: Options.ActionSelector = Options.ActionSelector.BestQValue, rollout_policy: Options.RolloutPolicy = Options.RolloutPolicy.Random, back_propagator: Options.BackPropagator = Options.BackPropagator.Graph, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._domain = None self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._discount = discount self._uct_mode = uct_mode self._ucb_constant = ucb_constant self._online_node_garbage = online_node_garbage self._custom_policy = custom_policy self._heuristic = heuristic self._transition_mode = transition_mode self._tree_policy = tree_policy self._expander = expander self._action_selector_optimization = action_selector_optimization self._action_selector_execution = action_selector_execution self._rollout_policy = rollout_policy self._back_propagator = back_propagator self._continuous_planning = continuous_planning self._debug_logs = debug_logs self._lambdas = [self._custom_policy, self._heuristic] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain], state_features: Callable[[Domain, D.T_state], Any], use_state_feature_hash: bool = False, use_simulation_domain: bool = False, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, exploration: float = 0.25, epsilon_moving_average_window: int = 100, epsilon: float = 0.001, discount: float = 1.0, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, watchdog: Callable[[int, int, float, float], bool] = None, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._domain = None self._state_features = state_features self._use_state_feature_hash = use_state_feature_hash self._use_simulation_domain = use_simulation_domain self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._exploration = exploration self._epsilon_moving_average_window = epsilon_moving_average_window self._epsilon = epsilon self._discount = discount self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs if watchdog is None: self._watchdog = (lambda elapsed_time, number_rollouts, best_value, epsilon_moving_average: True) else: self._watchdog = watchdog self._lambdas = [self._state_features] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]]] = None, use_labels: bool = True, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, epsilon_moving_average_window: int = 100, epsilon: float = 0.001, discount: float = 1.0, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, watchdog: Callable[[int, int, float, float], bool] = None, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._use_labels = use_labels self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._epsilon_moving_average_window = epsilon_moving_average_window self._epsilon = epsilon self._discount = discount self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs if watchdog is None: self._watchdog = (lambda elapsed_time, number_rollouts, best_value, epsilon_moving_average: True) else: self._watchdog = watchdog self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[Callable[[Domain, D.T_state], D.T_agent[Value[D.T_value]]]] = None, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None self._debug_logs = debug_logs if heuristic is None: self._heuristic = lambda d, s: Value(cost=0) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._ipc_notify = True
def __init__( self, domain_factory: Callable[[], Domain] = None, heuristic: Optional[ Callable[ [Domain, D.T_state], Tuple[ D.T_agent[Value[D.T_value]], D.T_agent[D.T_concurrency[D.T_event]], ], ] ] = None, time_budget: int = 3600000, rollout_budget: int = 100000, max_depth: int = 1000, max_feasibility_trials: int = 0, # will then choose nb_agents if 0 graph_expansion_rate: float = 0.1, epsilon_moving_average_window: int = 100, epsilon: float = 0.0, # not a stopping criterion by default discount: float = 1.0, action_choice_noise: float = 0.1, dead_end_cost: float = 10000, online_node_garbage: bool = False, continuous_planning: bool = True, parallel: bool = False, shared_memory_proxy=None, debug_logs: bool = False, watchdog: Callable[[int, int, float, float], bool] = None, ) -> None: ParallelSolver.__init__( self, domain_factory=domain_factory, parallel=parallel, shared_memory_proxy=shared_memory_proxy, ) self._solver = None if heuristic is None: self._heuristic = lambda d, s: ( {a: Value(cost=0) for a in s}, {a: None for a in s}, ) else: self._heuristic = heuristic self._lambdas = [self._heuristic] self._time_budget = time_budget self._rollout_budget = rollout_budget self._max_depth = max_depth self._max_feasibility_trials = max_feasibility_trials self._graph_expansion_rate = graph_expansion_rate self._epsilon_moving_average_window = epsilon_moving_average_window self._epsilon = epsilon self._discount = discount self._action_choice_noise = action_choice_noise self._dead_end_cost = dead_end_cost self._online_node_garbage = online_node_garbage self._continuous_planning = continuous_planning self._debug_logs = debug_logs if watchdog is None: self._watchdog = ( lambda elapsed_time, number_rollouts, best_value, epsilon_moving_average: True ) else: self._watchdog = watchdog self._ipc_notify = True