Esempio n. 1
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     # Setup required for unit tests.
     print("Unit testing CWD:", os.getcwd())
     self.config = DotDict.from_json("../Configurations/ModelConfigs/MuzeroBoard.json")
     self.g = HexGame(self.hex_board_size)
     self.net = HexNet(self.g, self.config.net_args)
     self.mcts = MuZeroMCTS(self.g, self.net, self.config.args)
Esempio n. 2
0
def game_from_name(name: str):
    """
    Constructor function to yield a Game class by a query string.
    :param name: str Represents the name/ key of the environment to train on.
    :return: Game Instance of Game that contains the environment logic.
    """
    match_name = name.lower()

    if match_name == "hex":
        return HexGame(BOARD_SIZE)

    elif match_name == "tictactoe":
        return TicTacToeGame(BOARD_SIZE)

    elif match_name == "othello":
        return OthelloGame(BOARD_SIZE)

    elif match_name == "gym" or match_name == "cartpole":
        return GymGame("CartPole-v1")

    elif match_name == "pendulum":

        def discretize_wrapper(env):
            return DiscretizeAction(env, 15)

        return GymGame("Pendulum-v0", [discretize_wrapper])

    elif match_name.startswith("gym_"):
        return GymGame(name[len("gym_"):])

    elif match_name.startswith("atari_"):
        game_name = match_name[len("atari_"):]
        game_name = game_name.capitalize() + "NoFrameskip-v4"
        return AtariGame(game_name)
Esempio n. 3
0
 def __init__(self, exploration, budget, model, boardsize, temperature=0):
     """
     The AlphaZero algorihtm must be configured with the parameters for the
     PUCT formula for childnode selection and expansion along with a search budget.
     Also the neural network must be provided along with the boardsize in order
     to initialize the MCTS backend of AlphaZero.
     :param exploration: float The exploration parameter of PUCT (c_puct).
     :param budget: int The amount of MCTS simulations to perform (N).
     :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search
     :param boardsize: int Size of the game board in order to initialize MCTS.
     :param temperature: int Governs the degree of exploration. (0 = greedy)
     """
     super().__init__()
     self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration})
     self.model = model
     self.game = HexGame(boardsize)
     self.searcher = MCTS(self.game, self.model, self.args)
     self.temperature = temperature  # temp=0 implies greedy actions
Esempio n. 4
0
class AlphaZeroPolicy(Policy):
    """
    Derived class of Policy to choose actions based on the AlphaZero Algorithm.
    """
    def __init__(self, exploration, budget, model, boardsize, temperature=0):
        """
        The AlphaZero algorihtm must be configured with the parameters for the
        PUCT formula for childnode selection and expansion along with a search budget.
        Also the neural network must be provided along with the boardsize in order
        to initialize the MCTS backend of AlphaZero.
        :param exploration: float The exploration parameter of PUCT (c_puct).
        :param budget: int The amount of MCTS simulations to perform (N).
        :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search
        :param boardsize: int Size of the game board in order to initialize MCTS.
        :param temperature: int Governs the degree of exploration. (0 = greedy)
        """
        super().__init__()
        self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration})
        self.model = model
        self.game = HexGame(boardsize)
        self.searcher = MCTS(self.game, self.model, self.args)
        self.temperature = temperature  # temp=0 implies greedy actions

    def generate_move(self, hex_board):
        """
        With the given parameters for the AlphaZero procedure and a currently
        provided game-state (=hex_board) call the backend MCTS class to
        perform forward search guided by the neural network to choose
        a move for the current player.
        :param hex_board: HexBoard Class for game-logic.
        :return: tuple Coordinate on the HexBoard to move to.
        :see: MCTSSearcher from .hex_search
        """
        self.calls += 1
        if self.perspective is None:
            raise Exception("No search/ player perspective given.")

        # The neural network AlphaZeroModel only learns in an uniform player perspective.
        # Hence we first alter the symmetry of the board according to the current player.
        # The returned move is of course transformed to the perspective of the
        # board that was provided in the function's argument.
        search_board = self.game.getCanonicalForm(np.copy(hex_board.board),
                                                  self.perspective)
        pi = self.searcher.runMCTS(search_board, temp=self.temperature)
        move_idx = np.argmax(pi)

        move = (move_idx // hex_board.size, move_idx % hex_board.size)
        if self.perspective == -1:  # Canonical form finds a move on a transposed board.
            move = move[::-1]

        return move
Esempio n. 5
0
def game_from_name(name):
    match_name = name.lower()

    if match_name == "hex":
        return HexGame(BOARD_SIZE)
    elif match_name == "tictactoe":
        return TicTacToeGame(BOARD_SIZE)
    elif match_name == "othello":
        return OthelloGame(BOARD_SIZE)
    elif match_name == "gym" or match_name == "cartpole":
        return GymGame("CartPole-v1")
    elif match_name == "pendulum":
        def discretize_wrapper(env):
            return DiscretizeAction(env, 15)

        return GymGame("Pendulum-v0", [discretize_wrapper])
    elif match_name.startswith("gym_"):
        return GymGame(name[len("gym_"):])
    elif match_name.startswith("atari_"):
        game_name = match_name[len("atari_"):]
        game_name = game_name.capitalize() + "NoFrameskip-v4"
        return AtariGame(game_name)
Esempio n. 6
0
class TestHexMuZero(unittest.TestCase):
    """
    Unit testing class to test whether the search engine exhibit well defined behaviour.
    This includes scenarios where either the model or inputs are faulty (empty observations,
    constant predictions, nans/ inf in observations).
    """
    hex_board_size: int = 5

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Setup required for unit tests.
        print("Unit testing CWD:", os.getcwd())
        self.config = DotDict.from_json("../Configurations/ModelConfigs/MuzeroBoard.json")
        self.g = HexGame(self.hex_board_size)
        self.net = HexNet(self.g, self.config.net_args)
        self.mcts = MuZeroMCTS(self.g, self.net, self.config.args)

    def test_empty_input(self):
        """
        Tests the following scenarios:
         - Assert that observation tensors with only zeros are encoded to finite values (can be zero)
         - Assert that latent state tensors with only zeros are transitioned to finite values (can be zero)
        """
        # Build the environment for an observation.
        s = self.g.getInitialState()
        o_t = self.g.buildObservation(s, player=1, form=self.g.Representation.HEURISTIC)
        h = GameHistory()

        # Build empty observations
        h.capture(o_t, -1, 1, np.array([]), 0, 0)
        stacked = h.stackObservations(self.net.net_args.observation_length, o_t)
        zeros_like = np.zeros_like(stacked)

        # Check if nans are produced
        latent, _, _ = self.net.initial_inference(zeros_like)
        self.assertTrue(np.isfinite(latent).all())

        # Exhaustively ensure that all possible dynamics function inputs lead to finite values.
        latent_forwards = [self.net.recurrent_inference(latent, action)[1] for action in range(self.g.getActionSize())]
        self.assertTrue(np.isfinite(np.array(latent_forwards)).all())

    def test_search_recursion_error(self):
        """
        The main phenomenon this test attempts to find is:
        Let s be the current latent state, s = [0, 0, 0], along with action a = 1.
        If we fetch the next latent state with (s, a) we do not want to get, s' == s = [0, 0, 0].
        s' is a new state, although it is present in the transition table due to being identical to s.
        if action a = 1 is chosen again by UCB, then this could result in infinite recursion.

        Tests the following scenarios:
         - Assert that MuMCTS does not result in a recursion error when called with the same
           input multiple times without clearing the tree.
         - Assert that MuMCTS does not result in a recursion error when inputs are either zero
           or random.
         - Assert that MuMCTS does not result in a recursion error when only one root action is legal.
        """
        rep = 30  # Repetition factor --> should be high.

        # Build the environment for an observation.
        s = self.g.getInitialState()
        o_t = self.g.buildObservation(s, player=1, form=self.g.Representation.HEURISTIC)
        h = GameHistory()

        # Build empty and random observations tensors
        h.capture(o_t, -1, 1, np.array([]), 0, 0)
        stacked = h.stackObservations(self.net.net_args.observation_length, o_t)
        zeros_like = np.zeros_like(stacked)
        random_like = np.random.rand(*zeros_like.shape)

        # Build root state legal action masks
        legals = np.ones(self.g.getActionSize())
        same = np.zeros_like(legals)
        same[0] = 1  # Can only do one move

        # Execute multiple MCTS runs that will result in recurring tree paths.
        for _ in range(rep):
            self.mcts.runMCTS(zeros_like, legals)  # Empty observations ALL moves at the root
        self.mcts.clear_tree()

        for _ in range(rep):
            self.mcts.runMCTS(zeros_like, same)  # Empty observations ONE move at the root
        self.mcts.clear_tree()

        for _ in range(rep):
            self.mcts.runMCTS(random_like, legals)  # Empty observations ALL moves at the root
        self.mcts.clear_tree()

        for _ in range(rep):
            self.mcts.runMCTS(random_like, same)  # Empty observations ONE move at the root
        self.mcts.clear_tree()

    def test_search_border_cases_latent_state(self):
        """
        Tests the following scenarios:
        - Assert that observation tensors with only infinities or nans result in finite tensors (zeros).
          Testing this phenomenon ensures that bad input is not propagated for more than one step.
          Note that one forward step using bad inputs can already lead to a recursion error in MuMCTS.
          see test_search_recursion_error
       """
        # Build the environment for an observation.
        s = self.g.getInitialState()
        o_t = self.g.buildObservation(s, player=1, form=self.g.Representation.HEURISTIC)
        h = GameHistory()

        # Build empty observations
        h.capture(o_t, -1, 1, np.array([]), 0, 0)
        stacked = h.stackObservations(self.net.net_args.observation_length, o_t)
        nans_like = np.zeros_like(stacked)
        inf_like = np.zeros_like(stacked)

        nans_like[nans_like == 0] = np.nan
        inf_like[inf_like == 0] = np.inf

        # Check if nans are produced
        nan_latent, _, _ = self.net.initial_inference(nans_like)
        inf_latent, _, _ = self.net.initial_inference(inf_like)

        self.assertTrue(np.isfinite(nan_latent).all())
        self.assertTrue(np.isfinite(inf_latent).all())

        nan_latent[nan_latent == 0] = np.nan
        inf_latent[inf_latent == 0] = np.inf

        # Exhaustively ensure that all possible dynamics function inputs lead to finite values.
        nan_latent_forwards = [self.net.recurrent_inference(nan_latent, action)[1] for action in range(self.g.getActionSize())]
        inf_latent_forwards = [self.net.recurrent_inference(inf_latent, action)[1] for action in range(self.g.getActionSize())]

        self.assertTrue(np.isfinite(np.array(nan_latent_forwards)).all())
        self.assertTrue(np.isfinite(np.array(inf_latent_forwards)).all())

    def test_ill_conditioned_model(self):
        """
        Execute all unit tests of this class using a model with badly conditioned weights.
        i.e., large weight magnitudes or only zeros.
        """

        class DumbModel(HexNet):

            def initial_inference(self, observations: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray, float]:
                s, pi, v = super().initial_inference(observations)
                return np.zeros_like(s), np.random.uniform(size=len(pi)), 0

            def recurrent_inference(self, latent_state: np.ndarray, action: int) -> typing.Tuple[float, np.ndarray]:
                r, s, pi, v = super().recurrent_inference(latent_state, action)
                return 0, np.zeros_like(latent_state), np.random.uniform(size=len(pi)), 0

        memory_net = self.net
        memory_search = self.mcts

        # Swap class variables
        self.net = DumbModel(self.g, self.config.net_args)
        self.mcts = MuZeroMCTS(self.g, self.net, self.config.args)

        self.test_search_recursion_error()

        # Undo class variables swap
        self.net = memory_net
        self.mcts = memory_search

    def test_combined_model(self):
        # The prediction and dynamics model can be combined into one computation graph.
        # This should be faster than calling the implementations separately. This test makes
        # sure that the output is still the same, and also shows the time difference.

        batch = 128
        dim = self.g.getDimensions()

        latent_planes = np.random.uniform(size=(batch, dim[0], dim[1]))
        actions = np.floor(np.random.uniform(size=batch) * dim[0] * dim[1])
        actions = actions.astype(int)

        recurrent_inputs = list(zip(latent_planes, actions))

        # This line is just for warm-up, otherwise the timing is unfair.
        combined_results = [self.net.recurrent_inference(latent, a) for latent, a in recurrent_inputs]

        t0 = time.time()
        combined_results = [self.net.recurrent_inference(latent, a) for latent, a in recurrent_inputs]
        t1 = time.time()
        combined_time = t1 - t0

        dynamics_results = [self.net.forward(latent, a) for latent, a in recurrent_inputs]
        predict_results = [self.net.predict(dyn[1]) for dyn in dynamics_results]

        t0 = time.time()
        dynamics_results = [self.net.forward(latent, a) for latent, a in recurrent_inputs]
        predict_results = [self.net.predict(dyn[1]) for dyn in dynamics_results]
        t1 = time.time()
        separate_time = t1 - t0

        print(f"Combined: {combined_time}. Separate: {separate_time}")

        # unzip results
        combined_results = list(zip(*combined_results))
        dynamics_results = list(zip(*dynamics_results))
        predict_results = list(zip(*predict_results))

        np.testing.assert_array_almost_equal(combined_results[0], dynamics_results[0])
        np.testing.assert_array_almost_equal(combined_results[1], dynamics_results[1])
        np.testing.assert_array_almost_equal(combined_results[2], predict_results[0])
        np.testing.assert_array_almost_equal(combined_results[3], predict_results[1])