) solvers = list(filter(lambda s: s["entry"] is not None, solvers)) # Run loop to ask user input domain = MyDomain() # MyDomain(5,5) with tqdm(total=len(solvers) * 100) as pbar: for s in solvers: solver_type = s["entry"] for i in range(50): s["config"]["shared_memory_proxy"] = None with solver_type(**s["config"]) as solver: MyDomain.solve_with(solver) # ,lambda:MyDomain(5,5)) rollout( domain, solver, max_steps=50, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) pbar.update(1) for i in range(50): s["config"]["shared_memory_proxy"] = GridShmProxy() with solver_type(**s["config"]) as solver: MyDomain.solve_with(solver) # ,lambda:MyDomain(5,5)) rollout( domain, solver, max_steps=50, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) pbar.update(1)
choice = int( input('\nChoose a solver:\n{solvers}\n'.format(solvers='\n'.join( ['0. Quit'] + [f'{i + 1}. {s["name"]}' for i, s in enumerate(solvers)])))) if choice == 0: # the user wants to quit break else: selected_solver = solvers[choice - 1] solver_type = selected_solver['entry'] # Test solver solution on domain print('==================== TEST SOLVER ====================') # Check if Random Walk selected or other if solver_type is None: rollout(domain, solver=None, max_steps=1000, max_framerate=30, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') else: # Check that the solver is compatible with the domain assert solver_type.check_domain(domain) # Solve with selected solver with solver_type(**selected_solver['config']) as solver: Maze.solve_with(solver) rollout(domain, solver, max_steps=1000, max_framerate=30, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}')
domain_factory = lambda: GymRIWDomain(gym_env=gym.make(ENV_NAME), set_state=lambda e, s: e.set_state(s), get_state=lambda e: e.get_state(), continuous_feature_fidelity=1, discretization_factor=9, max_depth=HORIZON) domain = domain_factory() if RIW.check_domain(domain): solver_factory = lambda: GymRIW(domain_factory=domain_factory, state_features=lambda d, s: d.bee1_features(s), use_state_feature_hash=False, use_simulation_domain=False, continuous_planning=True, online_node_garbage=True, time_budget=10000, rollout_budget=30, max_depth=100, exploration=0.5, parallel=False, debug_logs=False) with solver_factory() as solver: GymRIWDomain.solve_with(solver, domain_factory) rollout(domain, solver, num_episodes=1, max_steps=HORIZON, max_framerate=30, verbose=True, outcome_formatter=lambda o: f'{o.observation} - reward: {o.value.reward:.2f}', action_formatter=lambda a: f'{a}') with open('gym_jsbsim_riw.json', 'w') as myfile: mydict = solver.get_policy() json.dump({str(s): (str(v[0]), v[1]) for s, v in mydict.items()}, myfile)
outcome = self._domain.step(action) observations = {k: next(iter(self._domain.get_observation_space()[k].to_unwrapped([v]))) for k, v in outcome.observation.items()} rewards = {k: v.reward for k, v in outcome.value.items()} done = {'__all__': outcome.termination} infos = {k: (v or {}) for k, v in outcome.info.items()} return observations, rewards, done, infos def unwrapped(self): """Unwrap the scikit-decide domain and return it. # Returns The original scikit-decide domain. """ return self._domain if __name__ == '__main__': from ray.rllib.agents.ppo import PPOTrainer from skdecide.hub.domain.rock_paper_scissors import RockPaperScissors from skdecide.utils import rollout domain_factory = lambda: RockPaperScissors() domain = domain_factory() if RayRLlib.check_domain(domain): solver_factory = lambda: RayRLlib(PPOTrainer, train_iterations=1) solver = RockPaperScissors.solve_with(solver_factory, domain_factory) rollout(domain, solver, action_formatter=lambda a: str({k: v.name for k, v in a.items()}), outcome_formatter=lambda o: f'{ {k: v.name for k, v in o.observation.items()} }' f' - rewards: { {k: v.reward for k, v in o.value.items()} }')
domain_factory = lambda: GymRIWDomain(gym_env=FakeGymEnv(), set_state=lambda e, s: e.set_state(s), get_state=lambda e: e.get_state(), continuous_feature_fidelity=3, discretization_factor=5) domain = domain_factory() if RIW.check_domain(domain): solver_factory = lambda: RIW(domain_factory=domain_factory, state_features=lambda d, s: d.bee1_features(s ), use_state_feature_hash=False, use_simulation_domain=False, time_budget=200, rollout_budget=1000, max_depth=10, exploration=0.25, parallel=False, debug_logs=False) with solver_factory() as solver: GymRIWDomain.solve_with(solver, domain_factory) initial_state = solver._domain.reset() rollout(domain, solver, from_memory=initial_state, num_episodes=1, max_steps=HORIZON, max_framerate=30, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}')
''' # %% ENV_NAME = 'CartPole-v1' domain_factory = lambda: GymDomain(gym.make(ENV_NAME)) domain = domain_factory() if StableBaseline.check_domain(domain): solver_factory = lambda: StableBaseline( PPO, 'MlpPolicy', learn_config={'total_timesteps': 30000}, verbose=1) with solver_factory() as solver: GymDomain.solve_with(solver, domain_factory) solver.save('TEMP_Baselines') rollout(domain, solver, num_episodes=1, max_steps=1000, max_framerate=30, outcome_formatter=None) # %% ''' Restore saved solution and re-run rollout. ''' # %% with solver_factory() as solver: GymDomain.solve_with(solver, domain_factory, load_path='TEMP_Baselines') rollout(domain, solver, num_episodes=1, max_steps=1000,
def _list_hidden_solutions(self): """Return a list of all possible hidden solutions (n_colours ** n_positions).""" h_solutions = [tuple()] for i in range(self._n_positions): h_solutions = [s + (c,) for s in h_solutions for c in range(self._n_colours)] return h_solutions def _calc_score(self, state, guess): """Compute the score of a guess.""" solution = state.solution bulls = [False for _ in range(len(guess))] for i in range(len(guess)): if guess[i] == solution[i]: bulls[i] = True cows = [False for _ in range(len(guess))] for i in range(len(guess)): if guess[i] != solution[i]: for j in range(len(guess)): if guess[i] == solution[j] and not bulls[j] and not cows[j]: cows[j] = True break return Score(total_bulls=sum(bulls), total_cows=sum(cows)) if __name__ == '__main__': from skdecide.utils import rollout domain = MasterMind(3, 3) rollout(domain, max_steps=1000, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}')
. . . ... ... ....... . . . . . . . ..... . ... . ... . . . . . . . . . ... ... . . . ... . . . . . . . . . . ... ......... . . . . . . . . ..................... """ # Start top-left, try to reach bottom-right of this maze domain = MyDomain(State(1, 1), State(19, 19), maze_str) # Random walk in the maze (may sometimes reach the goal by chance) rollout(domain, max_steps=100, render=False) # %% """ Pick a solver (lazy A*) and solve the maze optimally. """ # %% # Check solver compatibility with the domain assert LazyAstar.check_domain(domain) # Compute solution and visualize it with LazyAstar() as solver: MyDomain.solve_with(solver, lambda: MyDomain(State(1, 1), State(19, 19), maze_str)) rollout(domain, solver, max_steps=100, max_framerate=10, verbose=False)
n_epochs = 500 epoch_size = 200 directions = 10 top_directions = 3 learning_rate = 0.02 policy_noise = 0.03 reward_maximization = True else: n_epochs = 300 epoch_size = 200 directions = 25 top_directions = 3 learning_rate = 1 policy_noise = 1 reward_maximization = True selected_domain = domains[domain_choice - 1] domain_type = selected_domain['entry'] domain = domain_type(**selected_domain['config']) solver_factory = lambda: ars.AugmentedRandomSearch(n_epochs=n_epochs, epoch_size=epoch_size, directions=directions, top_directions=top_directions, learning_rate=learning_rate, policy_noise=policy_noise, reward_maximization=reward_maximization) with solver_factory() as solver: GymDomain.solve_with(solver, lambda: domain_type(**selected_domain['config'])) # Test solver solution on domain print('==================== TEST SOLVER ====================') print(domain.get_observation_space().unwrapped(), '===>', domain.get_action_space().unwrapped()) rollout(domain, solver, **selected_domain['rollout']) if hasattr(domain, 'close'): domain.close()
def get_probability(distribution, element, n=100): """Utility function to get the probability of a specific element from a scikit-decide distribution (based on sampling if this distribution is not a DiscreteDistribution).""" # TODO: uncomment lines below once debugged # # Avoid "dumb" sampling if the distribution is a DiscreteDistribution: # if isinstance(distribution, DiscreteDistribution): # return next(p for e, p in distribution.get_values() if e == element) # else: p = 0 for i in range(n): x = distribution.sample() if x == element: p += 1 return p / n if __name__ == '__main__': from skdecide.hub.domain.mastermind import MasterMind from skdecide.utils import rollout domain_factory = lambda: MasterMind(3, 3) domain = domain_factory() if POMCP.check_domain(domain): solver = MasterMind.solve_with(POMCP, domain_factory) rollout(domain, solver, num_episodes=5, max_steps=1000, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}')
input("\nChoose a solver:\n{solvers}\n".format(solvers="\n".join( ["0. Quit"] + [f'{i + 1}. {s["name"]}' for i, s in enumerate(solvers)])))) if choice == 0: # the user wants to quit break else: selected_solver = solvers[choice - 1] solver_type = selected_solver["entry"] # Test solver solution on domain print("==================== TEST SOLVER ====================") # Check if Random Walk selected or other if solver_type is None: rollout( domain, solver=None, max_steps=1000, outcome_formatter=lambda o: f"{o.observation} - cost: {sum(o.value[a].cost for a in o.observation):.2f}", ) else: # Check that the solver is compatible with the domain assert solver_type.check_domain(domain) # Solve with selected solver with solver_type(**selected_solver["config"]) as solver: MultiAgentMaze.solve_with(solver) rollout( domain, solver, max_steps=1000, max_framerate=5, outcome_formatter=lambda o:
policy_noise = 1 reward_maximization = True selected_domain = domains[domain_choice - 1] domain_type = selected_domain["entry"] domain = domain_type(**selected_domain["config"]) solver_factory = lambda: ars.AugmentedRandomSearch( n_epochs=n_epochs, epoch_size=epoch_size, directions=directions, top_directions=top_directions, learning_rate=learning_rate, policy_noise=policy_noise, reward_maximization=reward_maximization, ) with solver_factory() as solver: GymDomain.solve_with( solver, lambda: domain_type(**selected_domain["config"]) ) # Test solver solution on domain print("==================== TEST SOLVER ====================") print( domain.get_observation_space().unwrapped(), "===>", domain.get_action_space().unwrapped(), ) rollout(domain, solver, **selected_domain["rollout"]) if hasattr(domain, "close"): domain.close()
def _state_reset(self) -> D.T_state: return State(num_move=0) def _get_observation( self, state: D.T_state, action: Optional[D.T_agent[D.T_concurrency[D.T_event]]] = None, ) -> D.T_agent[D.T_observation]: # The observation is simply the last opponent move (or Move.rock initially by default) obs1 = action["player2"] if action is not None else Move.rock obs2 = action["player1"] if action is not None else Move.rock return {"player1": obs1, "player2": obs2} def _get_observation_space_(self) -> D.T_agent[Space[D.T_observation]]: return {"player1": EnumSpace(Move), "player2": EnumSpace(Move)} if __name__ == "__main__": from skdecide.utils import rollout domain = RockPaperScissors() rollout( domain, action_formatter=lambda a: str({k: v.name for k, v in a.items()}), outcome_formatter=lambda o: f"{ {k: v.name for k, v in o.observation.items()} }" f" - rewards: { {k: v.reward for k, v in o.value.items()} }", )
set_state=lambda e, s: e.set_state(s), get_state=lambda e: e.get_state(), continuous_feature_fidelity=3, discretization_factor=5, max_depth=50, ) if IW.check_domain(domain_factory()): solver_factory = lambda: GymIW( domain_factory=domain_factory, state_features=lambda d, s: d.bee1_features(s), use_state_feature_hash=False, # node_ordering=lambda a_gscore, a_novelty, a_depth, b_gscore, b_novelty, b_depth: True if a_novelty > b_novelty else False if a_novelty < b_novelty else a_gscore < b_gscore, # node_ordering=lambda a_gscore, a_novelty, a_depth, b_gscore, b_novelty, b_depth: True if a_gscore < b_gscore else False if a_gscore > b_gscore else a_novelty > b_novelty, parallel=False, debug_logs=False, ) with solver_factory() as solver: GymIWDomain.solve_with(solver, domain_factory) evaluation_domain = EvaluationDomain(solver._domain) evaluation_domain.reset() rollout( evaluation_domain, solver, num_episodes=1, max_steps=HORIZON, max_framerate=30, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", )
# This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Example 1: Run a Gym environment""" # %% ''' Import modules. ''' # %% import gym from skdecide.hub.domain.gym import GymDomain from skdecide.utils import rollout # %% ''' Select a [Gym environment](https://gym.openai.com/envs) and run 5 episodes. ''' # %% ENV_NAME = 'CartPole-v1' # or any other installed environment ('MsPacman-v4'...) gym_domain = GymDomain(gym.make(ENV_NAME)) rollout(gym_domain, num_episodes=5, max_steps=1000, max_framerate=30, outcome_formatter=None) gym_domain.close() # optional but recommended to avoid Gym errors at the end
from skdecide.hub.solver.maxent_irl import ( # maximum entropy inverse reinforcement learning MaxentIRL, ) from skdecide.utils import rollout ENV_NAME = "MountainCar-v0" domain_factory = lambda: GymDomain(gym.make(ENV_NAME)) domain = domain_factory() print("===>", domain.get_action_space().unwrapped()) if MaxentIRL.check_domain(domain): solver_factory = lambda: MaxentIRL( n_states=400, n_actions=3, one_feature=20, expert_trajectories="expert_mountain.npy", n_epochs=10000, ) with solver_factory() as solver: GymDomain.solve_with(solver, domain_factory) rollout( domain, solver, num_episodes=5, max_steps=500, max_framerate=30, outcome_formatter=None, action_formatter=None, ) domain.close()
setattr(domain_type, 'state_features', lambda self, s: [s.x, s.y]) elif selected_domain['name'] == 'Maze': setattr(domain_type, 'state_features', lambda self, s: [s.x, s.y]) elif selected_domain['entry'].__name__ == 'GymDomain': setattr(domain_type, 'state_features', lambda self, s: self.bee1_features(s)) else: setattr(domain_type, 'state_features', lambda self, s: s) # Test solver solution on domain print('==================== TEST SOLVER ====================') # Check if Random Walk selected or other if solver_type is None: rollout(domain, solver=None, **selected_domain['rollout']) else: # Solve with selected solver actual_domain_type = domain_type actual_domain = domain if selected_solver['need_domain_factory']: if selected_domain['entry'].__name__ == 'GymDomain' and \ (selected_solver['entry'].__name__ == 'IW' or selected_solver['entry'].__name__ == 'BFWS'): actual_domain_type = GymDomainForWidthSolvers actual_domain = actual_domain_type(**selected_domain['config']) selected_solver['config']['node_ordering'] = lambda a_gscore, a_novelty, a_depth, b_gscore, b_novelty, b_depth: a_novelty > b_novelty selected_solver['config']['domain_factory'] = lambda: actual_domain_type(**selected_domain['config']) with solver_type(**selected_solver['config']) as solver: actual_domain_type.solve_with(solver, lambda: actual_domain_type(**selected_domain['config'])) rollout(actual_domain, solver, **selected_domain['rollout']) if hasattr(domain, 'close'):