def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Setup required for unit tests. print("Unit testing CWD:", os.getcwd()) self.config = DotDict.from_json("../Configurations/ModelConfigs/MuzeroBoard.json") self.g = HexGame(self.hex_board_size) self.net = HexNet(self.g, self.config.net_args) self.mcts = MuZeroMCTS(self.g, self.net, self.config.args)
def game_from_name(name: str): """ Constructor function to yield a Game class by a query string. :param name: str Represents the name/ key of the environment to train on. :return: Game Instance of Game that contains the environment logic. """ match_name = name.lower() if match_name == "hex": return HexGame(BOARD_SIZE) elif match_name == "tictactoe": return TicTacToeGame(BOARD_SIZE) elif match_name == "othello": return OthelloGame(BOARD_SIZE) elif match_name == "gym" or match_name == "cartpole": return GymGame("CartPole-v1") elif match_name == "pendulum": def discretize_wrapper(env): return DiscretizeAction(env, 15) return GymGame("Pendulum-v0", [discretize_wrapper]) elif match_name.startswith("gym_"): return GymGame(name[len("gym_"):]) elif match_name.startswith("atari_"): game_name = match_name[len("atari_"):] game_name = game_name.capitalize() + "NoFrameskip-v4" return AtariGame(game_name)
def __init__(self, exploration, budget, model, boardsize, temperature=0): """ The AlphaZero algorihtm must be configured with the parameters for the PUCT formula for childnode selection and expansion along with a search budget. Also the neural network must be provided along with the boardsize in order to initialize the MCTS backend of AlphaZero. :param exploration: float The exploration parameter of PUCT (c_puct). :param budget: int The amount of MCTS simulations to perform (N). :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search :param boardsize: int Size of the game board in order to initialize MCTS. :param temperature: int Governs the degree of exploration. (0 = greedy) """ super().__init__() self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration}) self.model = model self.game = HexGame(boardsize) self.searcher = MCTS(self.game, self.model, self.args) self.temperature = temperature # temp=0 implies greedy actions
class AlphaZeroPolicy(Policy): """ Derived class of Policy to choose actions based on the AlphaZero Algorithm. """ def __init__(self, exploration, budget, model, boardsize, temperature=0): """ The AlphaZero algorihtm must be configured with the parameters for the PUCT formula for childnode selection and expansion along with a search budget. Also the neural network must be provided along with the boardsize in order to initialize the MCTS backend of AlphaZero. :param exploration: float The exploration parameter of PUCT (c_puct). :param budget: int The amount of MCTS simulations to perform (N). :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search :param boardsize: int Size of the game board in order to initialize MCTS. :param temperature: int Governs the degree of exploration. (0 = greedy) """ super().__init__() self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration}) self.model = model self.game = HexGame(boardsize) self.searcher = MCTS(self.game, self.model, self.args) self.temperature = temperature # temp=0 implies greedy actions def generate_move(self, hex_board): """ With the given parameters for the AlphaZero procedure and a currently provided game-state (=hex_board) call the backend MCTS class to perform forward search guided by the neural network to choose a move for the current player. :param hex_board: HexBoard Class for game-logic. :return: tuple Coordinate on the HexBoard to move to. :see: MCTSSearcher from .hex_search """ self.calls += 1 if self.perspective is None: raise Exception("No search/ player perspective given.") # The neural network AlphaZeroModel only learns in an uniform player perspective. # Hence we first alter the symmetry of the board according to the current player. # The returned move is of course transformed to the perspective of the # board that was provided in the function's argument. search_board = self.game.getCanonicalForm(np.copy(hex_board.board), self.perspective) pi = self.searcher.runMCTS(search_board, temp=self.temperature) move_idx = np.argmax(pi) move = (move_idx // hex_board.size, move_idx % hex_board.size) if self.perspective == -1: # Canonical form finds a move on a transposed board. move = move[::-1] return move
def game_from_name(name): match_name = name.lower() if match_name == "hex": return HexGame(BOARD_SIZE) elif match_name == "tictactoe": return TicTacToeGame(BOARD_SIZE) elif match_name == "othello": return OthelloGame(BOARD_SIZE) elif match_name == "gym" or match_name == "cartpole": return GymGame("CartPole-v1") elif match_name == "pendulum": def discretize_wrapper(env): return DiscretizeAction(env, 15) return GymGame("Pendulum-v0", [discretize_wrapper]) elif match_name.startswith("gym_"): return GymGame(name[len("gym_"):]) elif match_name.startswith("atari_"): game_name = match_name[len("atari_"):] game_name = game_name.capitalize() + "NoFrameskip-v4" return AtariGame(game_name)
class TestHexMuZero(unittest.TestCase): """ Unit testing class to test whether the search engine exhibit well defined behaviour. This includes scenarios where either the model or inputs are faulty (empty observations, constant predictions, nans/ inf in observations). """ hex_board_size: int = 5 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Setup required for unit tests. print("Unit testing CWD:", os.getcwd()) self.config = DotDict.from_json("../Configurations/ModelConfigs/MuzeroBoard.json") self.g = HexGame(self.hex_board_size) self.net = HexNet(self.g, self.config.net_args) self.mcts = MuZeroMCTS(self.g, self.net, self.config.args) def test_empty_input(self): """ Tests the following scenarios: - Assert that observation tensors with only zeros are encoded to finite values (can be zero) - Assert that latent state tensors with only zeros are transitioned to finite values (can be zero) """ # Build the environment for an observation. s = self.g.getInitialState() o_t = self.g.buildObservation(s, player=1, form=self.g.Representation.HEURISTIC) h = GameHistory() # Build empty observations h.capture(o_t, -1, 1, np.array([]), 0, 0) stacked = h.stackObservations(self.net.net_args.observation_length, o_t) zeros_like = np.zeros_like(stacked) # Check if nans are produced latent, _, _ = self.net.initial_inference(zeros_like) self.assertTrue(np.isfinite(latent).all()) # Exhaustively ensure that all possible dynamics function inputs lead to finite values. latent_forwards = [self.net.recurrent_inference(latent, action)[1] for action in range(self.g.getActionSize())] self.assertTrue(np.isfinite(np.array(latent_forwards)).all()) def test_search_recursion_error(self): """ The main phenomenon this test attempts to find is: Let s be the current latent state, s = [0, 0, 0], along with action a = 1. If we fetch the next latent state with (s, a) we do not want to get, s' == s = [0, 0, 0]. s' is a new state, although it is present in the transition table due to being identical to s. if action a = 1 is chosen again by UCB, then this could result in infinite recursion. Tests the following scenarios: - Assert that MuMCTS does not result in a recursion error when called with the same input multiple times without clearing the tree. - Assert that MuMCTS does not result in a recursion error when inputs are either zero or random. - Assert that MuMCTS does not result in a recursion error when only one root action is legal. """ rep = 30 # Repetition factor --> should be high. # Build the environment for an observation. s = self.g.getInitialState() o_t = self.g.buildObservation(s, player=1, form=self.g.Representation.HEURISTIC) h = GameHistory() # Build empty and random observations tensors h.capture(o_t, -1, 1, np.array([]), 0, 0) stacked = h.stackObservations(self.net.net_args.observation_length, o_t) zeros_like = np.zeros_like(stacked) random_like = np.random.rand(*zeros_like.shape) # Build root state legal action masks legals = np.ones(self.g.getActionSize()) same = np.zeros_like(legals) same[0] = 1 # Can only do one move # Execute multiple MCTS runs that will result in recurring tree paths. for _ in range(rep): self.mcts.runMCTS(zeros_like, legals) # Empty observations ALL moves at the root self.mcts.clear_tree() for _ in range(rep): self.mcts.runMCTS(zeros_like, same) # Empty observations ONE move at the root self.mcts.clear_tree() for _ in range(rep): self.mcts.runMCTS(random_like, legals) # Empty observations ALL moves at the root self.mcts.clear_tree() for _ in range(rep): self.mcts.runMCTS(random_like, same) # Empty observations ONE move at the root self.mcts.clear_tree() def test_search_border_cases_latent_state(self): """ Tests the following scenarios: - Assert that observation tensors with only infinities or nans result in finite tensors (zeros). Testing this phenomenon ensures that bad input is not propagated for more than one step. Note that one forward step using bad inputs can already lead to a recursion error in MuMCTS. see test_search_recursion_error """ # Build the environment for an observation. s = self.g.getInitialState() o_t = self.g.buildObservation(s, player=1, form=self.g.Representation.HEURISTIC) h = GameHistory() # Build empty observations h.capture(o_t, -1, 1, np.array([]), 0, 0) stacked = h.stackObservations(self.net.net_args.observation_length, o_t) nans_like = np.zeros_like(stacked) inf_like = np.zeros_like(stacked) nans_like[nans_like == 0] = np.nan inf_like[inf_like == 0] = np.inf # Check if nans are produced nan_latent, _, _ = self.net.initial_inference(nans_like) inf_latent, _, _ = self.net.initial_inference(inf_like) self.assertTrue(np.isfinite(nan_latent).all()) self.assertTrue(np.isfinite(inf_latent).all()) nan_latent[nan_latent == 0] = np.nan inf_latent[inf_latent == 0] = np.inf # Exhaustively ensure that all possible dynamics function inputs lead to finite values. nan_latent_forwards = [self.net.recurrent_inference(nan_latent, action)[1] for action in range(self.g.getActionSize())] inf_latent_forwards = [self.net.recurrent_inference(inf_latent, action)[1] for action in range(self.g.getActionSize())] self.assertTrue(np.isfinite(np.array(nan_latent_forwards)).all()) self.assertTrue(np.isfinite(np.array(inf_latent_forwards)).all()) def test_ill_conditioned_model(self): """ Execute all unit tests of this class using a model with badly conditioned weights. i.e., large weight magnitudes or only zeros. """ class DumbModel(HexNet): def initial_inference(self, observations: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray, float]: s, pi, v = super().initial_inference(observations) return np.zeros_like(s), np.random.uniform(size=len(pi)), 0 def recurrent_inference(self, latent_state: np.ndarray, action: int) -> typing.Tuple[float, np.ndarray]: r, s, pi, v = super().recurrent_inference(latent_state, action) return 0, np.zeros_like(latent_state), np.random.uniform(size=len(pi)), 0 memory_net = self.net memory_search = self.mcts # Swap class variables self.net = DumbModel(self.g, self.config.net_args) self.mcts = MuZeroMCTS(self.g, self.net, self.config.args) self.test_search_recursion_error() # Undo class variables swap self.net = memory_net self.mcts = memory_search def test_combined_model(self): # The prediction and dynamics model can be combined into one computation graph. # This should be faster than calling the implementations separately. This test makes # sure that the output is still the same, and also shows the time difference. batch = 128 dim = self.g.getDimensions() latent_planes = np.random.uniform(size=(batch, dim[0], dim[1])) actions = np.floor(np.random.uniform(size=batch) * dim[0] * dim[1]) actions = actions.astype(int) recurrent_inputs = list(zip(latent_planes, actions)) # This line is just for warm-up, otherwise the timing is unfair. combined_results = [self.net.recurrent_inference(latent, a) for latent, a in recurrent_inputs] t0 = time.time() combined_results = [self.net.recurrent_inference(latent, a) for latent, a in recurrent_inputs] t1 = time.time() combined_time = t1 - t0 dynamics_results = [self.net.forward(latent, a) for latent, a in recurrent_inputs] predict_results = [self.net.predict(dyn[1]) for dyn in dynamics_results] t0 = time.time() dynamics_results = [self.net.forward(latent, a) for latent, a in recurrent_inputs] predict_results = [self.net.predict(dyn[1]) for dyn in dynamics_results] t1 = time.time() separate_time = t1 - t0 print(f"Combined: {combined_time}. Separate: {separate_time}") # unzip results combined_results = list(zip(*combined_results)) dynamics_results = list(zip(*dynamics_results)) predict_results = list(zip(*predict_results)) np.testing.assert_array_almost_equal(combined_results[0], dynamics_results[0]) np.testing.assert_array_almost_equal(combined_results[1], dynamics_results[1]) np.testing.assert_array_almost_equal(combined_results[2], predict_results[0]) np.testing.assert_array_almost_equal(combined_results[3], predict_results[1])