def play_game(config: MuZeroConfig, network: Network) -> Game: game = config.new_game() while not game.terminal() and len(game.history) < config.max_moves: # create a new starting point for MCTS root = Node(0) current_observation = game.make_image(-1) root.expand_node(game.to_play(), game.legal_actions(), network.initial_inference(current_observation)) root.add_exploration_noise() # carry out the MCTS search run_mcts(config, root, game.action_history(), network) T = config.visit_softmax_temperature(num_moves=len(game.history), training_steps = network.training_steps()) # first action from the MCTS with some extra exploration action, c1 = root.select_action_with_temperature(T, epsilon = config.epsilon) game.apply(action) game.store_search_statistics(root) # continue using actions as predicted by MCTS # (minimise exploration for these) ct = 1 if not game.terminal() and ct < config.prediction_steps: action, c1 = c1.select_action_with_temperature(1) game.apply(action) game.store_search_statistics(c1) ct += 1 return game
def play_game(self) -> Game: game = Game(self.config.discount) min_max_stats = MinMaxStats(self.config.known_bounds) # Use Exponential Decay to reduce temperature over time temperature = max( self.temperature * (1 - self.config.temperature_decay_factor)** self.network.training_steps(), self.config.temperature_min) self.metrics_temperature(temperature) while not game.terminal() and len( game.history) < self.config.max_moves: # At the root of the search tree we use the representation function to # obtain a hidden state given the current observation. root = Node(0) current_observation = game.get_observation_from_index(-1) network_output = self.network.initial_inference( current_observation) expand_node(root, game.to_play(), game.legal_actions(), network_output) backpropagate([root], network_output.value, game.to_play(), self.config.discount, min_max_stats) add_exploration_noise(self.config, root) # We then run a Monte Carlo Tree Search using only action sequences and the # model learned by the network. run_mcts(self.config, root, game.action_history(), self.network, min_max_stats) action = select_action(root, temperature) game.apply(action) game.store_search_statistics(root) return game
def _step_callback(self): '''Callback function for button that steps through the game world''' if not self.game.terminal: root = Node(0) current_observation = self.game.make_image(-1, self.network.device) expand_node(root, self.game.to_play(), self.game.legal_actions(), self.network.initial_inference(current_observation)) add_exploration_noise(self.config, root) # We then run a Monte Carlo Tree Search using only action sequences and the # model learned by the network. run_mcts(self.config, root, self.game.action_history(), self.network) #action = select_action(self.config, len(self.game.history), root) action = select_action(self.config, 9, root) self.game.apply(action) self.game.store_search_statistics(root) self.draw_area.draw()
def play_game(config: Config, network: Network): game = Game() while not game.terminal() and len(game.history) < config.max_moves: action, root = run_mcts(config, game, network) game.apply(action) game.store_search_statistics(root) return game
def play_game(config, network, train): """ Each game is produced by starting at the initial board position, then repeatedly executing a Monte Carlo Tree Search to generate moves until the end of the game is reached. """ game = config.new_game() game_history = GameHistory() observation = game.reset() game_history.apply(0, observation, 0) while not game.terminal() and len( game_history.action_history) < config.max_moves: # At the root of the search tree we use the representation function to # obtain a hidden state given the current observation. root = Node(0) current_observation = game_history.make_image(-1) current_observation = torch.tensor(observation).float().unsqueeze(0) expand_node(config, root, game.to_play(), game.legal_actions(), network.initial_inference(current_observation)) if train: add_exploration_noise(config, root) # We then run a Monte Carlo Tree Search using only action sequences and the # model learned by the networks. run_mcts(config, root, game, network) action = select_action(config, len(game_history.action_history), root, train) observation, reward = game.step(action) game_history.store_search_statistics(root, config.action_space) game_history.apply(action, observation, reward) game.close() return game_history
def play_game(config: MuZeroConfig, network: Network) -> Game: game = Game.from_config(config) while not game.terminal() and len(game.history) < config.max_moves: # At the root of the search tree we use the representation function to # obtain a hidden state given the current observation. root = Node(0) last_observation = game.make_image(-1) root.expand(game.to_play(), game.legal_actions(), network.initial_inference(last_observation).numpy()) root.add_exploration_noise(config) # logging.debug('Running MCTS on step {}.'.format(len(game.history))) # We then run a Monte Carlo Tree Search using only action sequences and the # model learned by the network. run_mcts(config, root, game.action_history(), network) action = root.select_action(config, len(game.history), network) game.apply(action) game.store_search_statistics(root) logging.info('Finished episode at step {} | cumulative reward: {}' \ .format(len(game.obs_history), sum(game.rewards))) return game
import mcts import argparse import sys parser = argparse.ArgumentParser(description='Solve Sobokan puzzles') parser.add_argument("-m", "--map", dest="map_choice", default="boards/test_board1.txt", help="Choose a map file (default: trivial_board.txt)") args = parser.parse_args() new_board = board.gen_default_board(args.map_choice) print("Running IDA* with Manhattan Distance heuristic") ida.ida_run(new_board,"MD") print("Running IDA* with Hungarian heuristic") new_board = board.gen_default_board(args.map_choice) ida.ida_run(new_board,"HU") print("Running RBFS with Manhattan Distance heurisitc") new_board = board.gen_default_board(args.map_choice) rbfs.rbfs_run(new_board,"MD") print("Running RBFS with Hungarian heurisitc") new_board = board.gen_default_board(args.map_choice) rbfs.rbfs_run(new_board,"HU") print("Running MCTS") new_board = board.gen_default_board(args.map_choice) mcts.run_mcts(new_board)