Esempio n. 1
0
def play_game(config: MuZeroConfig, network: Network) -> Game:

    game = config.new_game()

    while not game.terminal() and len(game.history) < config.max_moves:

        # create a new starting point for MCTS
        root = Node(0)
        current_observation = game.make_image(-1)

        root.expand_node(game.to_play(), game.legal_actions(),
                        network.initial_inference(current_observation)) 
        root.add_exploration_noise()

        # carry out the MCTS search
        run_mcts(config, root, game.action_history(), network)
        
        T = config.visit_softmax_temperature(num_moves=len(game.history), training_steps = network.training_steps())

        # first action from the MCTS with some extra exploration
        action, c1 = root.select_action_with_temperature(T, epsilon = config.epsilon) 
        game.apply(action)
        game.store_search_statistics(root) 
        
        # continue using actions as predicted by MCTS
        # (minimise exploration for these)
        ct = 1
        if not game.terminal() and ct < config.prediction_steps:
            action, c1 = c1.select_action_with_temperature(1) 
            game.apply(action)
            game.store_search_statistics(c1)
            ct += 1
        
    return game
Esempio n. 2
0
    def play_game(self) -> Game:
        game = Game(self.config.discount)
        min_max_stats = MinMaxStats(self.config.known_bounds)

        # Use Exponential Decay to reduce temperature over time
        temperature = max(
            self.temperature * (1 - self.config.temperature_decay_factor)**
            self.network.training_steps(), self.config.temperature_min)
        self.metrics_temperature(temperature)

        while not game.terminal() and len(
                game.history) < self.config.max_moves:

            # At the root of the search tree we use the representation function to
            # obtain a hidden state given the current observation.
            root = Node(0)
            current_observation = game.get_observation_from_index(-1)
            network_output = self.network.initial_inference(
                current_observation)
            expand_node(root, game.to_play(), game.legal_actions(),
                        network_output)
            backpropagate([root], network_output.value, game.to_play(),
                          self.config.discount, min_max_stats)
            add_exploration_noise(self.config, root)

            # We then run a Monte Carlo Tree Search using only action sequences and the
            # model learned by the network.
            run_mcts(self.config, root, game.action_history(), self.network,
                     min_max_stats)
            action = select_action(root, temperature)
            game.apply(action)
            game.store_search_statistics(root)

        return game
Esempio n. 3
0
    def _step_callback(self):
        '''Callback function for button that steps through the game world'''
        if not self.game.terminal:
            root = Node(0)
            current_observation = self.game.make_image(-1, self.network.device)
            expand_node(root, self.game.to_play(), self.game.legal_actions(),
                        self.network.initial_inference(current_observation))
            add_exploration_noise(self.config, root)

            # We then run a Monte Carlo Tree Search using only action sequences and the
            # model learned by the network.
            run_mcts(self.config, root, self.game.action_history(),
                     self.network)
            #action = select_action(self.config, len(self.game.history), root)
            action = select_action(self.config, 9, root)
            self.game.apply(action)
            self.game.store_search_statistics(root)
        self.draw_area.draw()
Esempio n. 4
0
def play_game(config: Config, network: Network):

	game = Game()

	while not game.terminal() and len(game.history) < config.max_moves:

		action, root = run_mcts(config, game, network)
		game.apply(action)
		game.store_search_statistics(root)

	return game 
def play_game(config, network, train):
    """
    Each game is produced by starting at the initial board position, then
    repeatedly executing a Monte Carlo Tree Search to generate moves until the end
    of the game is reached.
    """
    game = config.new_game()

    game_history = GameHistory()
    observation = game.reset()
    game_history.apply(0, observation, 0)

    while not game.terminal() and len(
            game_history.action_history) < config.max_moves:
        # At the root of the search tree we use the representation function to
        # obtain a hidden state given the current observation.
        root = Node(0)
        current_observation = game_history.make_image(-1)
        current_observation = torch.tensor(observation).float().unsqueeze(0)

        expand_node(config, root, game.to_play(), game.legal_actions(),
                    network.initial_inference(current_observation))
        if train:
            add_exploration_noise(config, root)

        # We then run a Monte Carlo Tree Search using only action sequences and the
        # model learned by the networks.
        run_mcts(config, root, game, network)
        action = select_action(config, len(game_history.action_history), root,
                               train)

        observation, reward = game.step(action)
        game_history.store_search_statistics(root, config.action_space)
        game_history.apply(action, observation, reward)

    game.close()

    return game_history
Esempio n. 6
0
def play_game(config: MuZeroConfig, network: Network) -> Game:
    game = Game.from_config(config)

    while not game.terminal() and len(game.history) < config.max_moves:
        # At the root of the search tree we use the representation function to
        # obtain a hidden state given the current observation.
        root = Node(0)
        last_observation = game.make_image(-1)
        root.expand(game.to_play(), game.legal_actions(),
                    network.initial_inference(last_observation).numpy())
        root.add_exploration_noise(config)

        # logging.debug('Running MCTS on step {}.'.format(len(game.history)))
        # We then run a Monte Carlo Tree Search using only action sequences and the
        # model learned by the network.
        run_mcts(config, root, game.action_history(), network)
        action = root.select_action(config, len(game.history), network)
        game.apply(action)
        game.store_search_statistics(root)

    logging.info('Finished episode at step {} | cumulative reward: {}' \
        .format(len(game.obs_history), sum(game.rewards)))

    return game
Esempio n. 7
0
import mcts

import argparse
import sys

parser = argparse.ArgumentParser(description='Solve Sobokan puzzles')
parser.add_argument("-m", "--map",
                    dest="map_choice",
                    default="boards/test_board1.txt",
                    help="Choose a map file (default: trivial_board.txt)")


args = parser.parse_args()
new_board = board.gen_default_board(args.map_choice)


print("Running IDA* with Manhattan Distance heuristic")
ida.ida_run(new_board,"MD")
print("Running IDA* with Hungarian heuristic")
new_board = board.gen_default_board(args.map_choice)
ida.ida_run(new_board,"HU")
print("Running RBFS with Manhattan Distance heurisitc")
new_board = board.gen_default_board(args.map_choice)
rbfs.rbfs_run(new_board,"MD")
print("Running RBFS with Hungarian heurisitc")
new_board = board.gen_default_board(args.map_choice)
rbfs.rbfs_run(new_board,"HU")
print("Running MCTS")
new_board = board.gen_default_board(args.map_choice)
mcts.run_mcts(new_board)