def test_simulate(): tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] for _ in range(1000): outcome = tree.simulate(root) assert isinstance(outcome, BoardPiece) or isinstance( outcome, GameState)
def test_make_node(): tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) assert isinstance(tree.nodes[key], MonteCarloNode) node1 = MonteCarloNode(initial_state, player) node2 = tree.nodes[key] for attribute, value in vars(node1).items(): assert np.all(vars(node2)[attribute] == value)
def test_expand(): tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] for _ in root.unexpanded_moves: child = tree.expand(root) assert isinstance(child, MonteCarloNode) assert child.last_move in root.legal_moves assert child.last_move in root.expanded_moves assert child.parent == root assert child.to_play == BoardPiece(player % 2 + 1) child_key = hash(child.board.tostring()) + hash(child.to_play) assert tree.nodes[child_key] == child
def test_run_search(): tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] n_sims = 10 for i in range(n_sims): tree.run_search(root.board, root.to_play, 1) assert root.n_plays == i + 1
def test_backpropagate(): tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] n_test_sims = 100 for m in root.legal_moves: wins = 0 child = tree.expand(root) for i in range(n_test_sims): outcome = tree.simulate(child) tree.backpropagate(child, outcome) if outcome == player: wins += 1 assert child.n_wins == wins assert child.n_plays == n_test_sims assert root.n_plays == sum([c.n_plays for c in root.children.values()]) assert root.n_wins == sum([c.n_wins for c in root.children.values()])
def test_best_play(): # check that best plays are max n_plays tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] tree.run_search(root.board, root.to_play, n_sims=5000) # check that best move is the max of n_plays of children scores = [root.get_child(a).n_plays for a in root.legal_moves] assert tree.best_play( root.board, root.to_play)[0] == root.legal_moves[np.argmax(scores)] # check that winning moves are selected for c in get_valid_moves(initial_state): near_win = copy.deepcopy(initial_state) near_win[:3, c] = player # print(near_win) tree = MonteCarlo(player) tree.make_node(near_win, player) tree.run_search(near_win, player, n_sims=1000) # print(tree.get_stats(near_win, player)) assert tree.best_play(near_win, player)[0] == c
def test_select(): tree = MonteCarlo(player) tree.make_node(initial_state, player)