def playMCTSgame(): ttt = tictactoe.TicTacToeGameState() root = mcts.MCTSNode(ttt) node = root while ttt.winner is None: move = mcts.mcts(node, 10000) ttt.executeMove(move) node = mcts.MCTSNode(ttt) print ttt
def playMCTSgame(): cf = connectfour.ConnectFourGameState() root = mcts.MCTSNode(cf) node = root while cf.winner is None: move = mcts.mcts(node, 20000) cf.executeMove(move) node = mcts.MCTSNode(cf) print cf print "Move:", move print "Winner is player", cf.winner
def test_action_flipping(self): np.random.seed(1) probs = np.array([.02] * (go.N * go.N + 1)) probs = probs + np.random.random([go.N * go.N + 1]) * 0.001 black_root = mcts.MCTSNode(go.Position()) white_root = mcts.MCTSNode(go.Position(to_play=go.WHITE)) black_root.select_leaf().incorporate_results(probs, 0, black_root) white_root.select_leaf().incorporate_results(probs, 0, white_root) # No matter who is to play, when we know nothing else, the priors # should be respected, and the same move should be picked black_leaf = black_root.select_leaf() white_leaf = white_root.select_leaf() self.assertEqual(black_leaf.fmove, white_leaf.fmove) self.assertEqualNPArray(black_root.child_action_score, white_root.child_action_score)
def test_add_child_idempotency(self): root = mcts.MCTSNode(go.Position()) child = root.maybe_add_child(17) current_children = copy.copy(root.children) child2 = root.maybe_add_child(17) self.assertEqual(child, child2) self.assertEqual(current_children, root.children)
def initialize_game(self, position=None): if position is None: position = go.Position() self.root = mcts.MCTSNode(position) self.result = 0 self.result_string = None self.comments = [] self.searches_pi = []
def test_select_leaf(self): flattened = coords.to_flat(coords.from_kgs('D9')) probs = np.array([.02] * (go.N * go.N + 1)) probs[flattened] = 0.4 root = mcts.MCTSNode(SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) self.assertEqual(root.position.to_play, go.WHITE) self.assertEqual(root.select_leaf(), root.children[flattened])
def playMCTSgame(): q = quoridor.QuoridorGameState() root = mcts.MCTSNode(q) node = root while q.winner is None: start = time.clock() move = mcts.mcts(node, 10000) end = time.clock() print "Move time: ", str(end - start) q.executeMove(move) node = mcts.MCTSNode(q) print "Player", str(3 % q.currentPlayer), "Move:", move print q print "Winner is player", q.winner
def test_do_not_explore_past_finish(self): probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32) root = mcts.MCTSNode(go.Position()) root.select_leaf().incorporate_results(probs, 0, root) first_pass = root.maybe_add_child(coords.to_flat(None)) first_pass.incorporate_results(probs, 0, root) second_pass = first_pass.maybe_add_child(coords.to_flat(None)) with self.assertRaises(AssertionError): second_pass.incorporate_results(probs, 0, root) node_to_explore = second_pass.select_leaf() # should just stop exploring at the end position. self.assertEqual(second_pass, node_to_explore)
def test_normalize_policy(self): # sum of probs > 1.0 probs = np.array([2.0] * (go.N * go.N + 1)) root = mcts.MCTSNode(TEST_POSITION) root.incorporate_results(probs, 0, root) root.N = 0 # Policy sums to 1.0, only legal moves have non-zero values. self.assertAlmostEqual(1.0, sum(root.child_prior)) self.assertEqual(6, np.count_nonzero(root.child_prior)) self.assertEqual(0, sum(root.child_prior * root.illegal_moves))
def initialize_game(self, board=None): if board is None: board = hex.Hex() self.board = board self.root = mcts.MCTSNode(board, cpuct=self.cpuct) self.result = 0 self.result_string = None self.comments = [] self.searches_pi = [] self.qs = [] first_node = self.root.select_leaf() prob, val = self.net.run(first_node.board) first_node.incorporate_results(prob, val, first_node)
def test_dont_pick_unexpanded_child(self): probs = np.array([0.001] * (go.N * go.N + 1)) # make one move really likely so that tree search goes down that path twice # even with a virtual loss probs[17] = 0.999 root = mcts.MCTSNode(go.Position()) root.incorporate_results(probs, 0, root) leaf1 = root.select_leaf() self.assertEqual(17, leaf1.fmove) leaf1.add_virtual_loss(up_to=root) # the second select_leaf pick should return the same thing, since the child # hasn't yet been sent to neural net for eval + result incorporation leaf2 = root.select_leaf() self.assertIs(leaf1, leaf2)
def test_upper_bound_confidence(self): probs = np.array([.02] * (go.N * go.N + 1)) root = mcts.MCTSNode(go.Position()) leaf = root.select_leaf() self.assertEqual(root, leaf) leaf.incorporate_results(probs, 0.5, root) # 0.02 are normalized to 1/82 self.assertAlmostEqual(root.child_prior[0], 1 / 82) self.assertAlmostEqual(root.child_prior[1], 1 / 82) puct_policy = lambda n: 2.0 * (math.log( (1.0 + n + FLAGS.c_puct_base) / FLAGS.c_puct_base) + FLAGS. c_puct_init) * 1 / 82 self.assertEqual(root.N, 1) self.assertAlmostEqual(root.child_U[0], puct_policy(root.N) * math.sqrt(1) / (1 + 0)) leaf = root.select_leaf() self.assertNotEqual(root, leaf) # With the first child expanded. self.assertEqual(root.N, 1) self.assertAlmostEqual(root.child_U[0], puct_policy(root.N) * math.sqrt(1) / (1 + 0)) self.assertAlmostEqual(root.child_U[1], puct_policy(root.N) * math.sqrt(1) / (1 + 0)) leaf.add_virtual_loss(up_to=root) leaf2 = root.select_leaf() self.assertNotIn(leaf2, (root, leaf)) leaf.revert_virtual_loss(up_to=root) leaf.incorporate_results(probs, 0.3, root) leaf2.incorporate_results(probs, 0.3, root) # With the 2nd child expanded. self.assertEqual(root.N, 3) self.assertAlmostEqual(root.child_U[0], puct_policy(root.N) * math.sqrt(2) / (1 + 1)) self.assertAlmostEqual(root.child_U[1], puct_policy(root.N) * math.sqrt(2) / (1 + 1)) self.assertAlmostEqual(root.child_U[2], puct_policy(root.N) * math.sqrt(2) / (1 + 0))
def test_backup_incorporate_results(self): probs = np.array([.02] * (go.N * go.N + 1)) root = mcts.MCTSNode(SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, torch.tensor(0), root) leaf = root.select_leaf() leaf.incorporate_results(probs, torch.tensor(-1), root) # white wins! # Root was visited twice: first at the root, then at this child. self.assertEqual(root.N, 2) # Root has 0 as a prior and two visits with value 0, -1 self.assertAlmostEqual(-1 / 3, root.Q) # average of 0, 0, -1 # Leaf should have one visit self.assertEqual(1, root.child_N[leaf.fmove]) self.assertEqual(1, leaf.N) # And that leaf's value had its parent's Q (0) as a prior, so the Q # should now be the average of 0, -1 self.assertAlmostEqual(-0.5, root.child_Q[leaf.fmove]) self.assertAlmostEqual(-0.5, leaf.Q) # We're assuming that select_leaf() returns a leaf like: # root # \ # leaf # \ # leaf2 # which happens in this test because root is W to play and leaf was a W win. self.assertEqual(go.WHITE, root.position.to_play) leaf2 = root.select_leaf() leaf2.incorporate_results(probs, torch.tensor(-0.2, dtype=torch.float32), root) # another white semi-win self.assertEqual(3, root.N) # average of 0, 0, -1, -0.2 self.assertAlmostEqual(-0.3, root.Q) self.assertEqual(2, leaf.N) self.assertEqual(1, leaf2.N) # average of 0, -1, -0.2 self.assertAlmostEqual(root.child_Q[leaf.fmove], leaf.Q) self.assertAlmostEqual(-0.4, leaf.Q) # average of -1, -0.2 self.assertAlmostEqual(-0.6, leaf.child_Q[leaf2.fmove]) self.assertAlmostEqual(-0.6, leaf2.Q)
def test_inject_noise_only_legal_moves(self): probs = np.array([0.02] * (go.N * go.N + 1)) root = mcts.MCTSNode(TEST_POSITION) root.incorporate_results(probs, 0, root) root.N = 0 uniform_policy = 1 / sum(root.illegal_moves == 0) expected_policy = uniform_policy * (1 - root.illegal_moves) self.assertTrue((root.child_prior == expected_policy).all()) root.inject_noise() # 0.75/0.25 derived from default dirichlet_noise_weight. self.assertTrue((0.75 * expected_policy <= root.child_prior).all()) self.assertTrue( (0.75 * expected_policy + 0.25 >= root.child_prior).all()) # Policy sums to 1.0, only legal moves have non-zero values. self.assertAlmostEqual(1.0, sum(root.child_prior)) self.assertEqual(0, sum(root.child_prior * root.illegal_moves))
def test_never_select_illegal_moves(self): probs = np.array([0.02] * (go.N * go.N + 1)) # let's say the NN were to accidentally put a high weight on an illegal move probs[1] = 0.99 root = mcts.MCTSNode(SEND_TWO_RETURN_ONE) root.incorporate_results(probs, 0, root) # and let's say the root were visited a lot of times, which pumps up the # action score for unvisited moves... root.N = 100000 root.child_N[root.position.all_legal_moves()] = 10000 # this should not throw an error... leaf = root.select_leaf() # the returned leaf should not be the illegal move self.assertNotEqual(1, leaf.fmove) # and even after injecting noise, we should still not select an illegal move for i in range(10): root.inject_noise() leaf = root.select_leaf() self.assertNotEqual(1, leaf.fmove)
def playAgainstMCTS(): ttt = tictactoe.TicTacToeGameState() print ttt while True: move = int(input("Make your move: ")) ttt.executeMove(move) if ttt.winner is not None: break node = mcts.MCTSNode(ttt) computerMove = mcts.mcts(node, 10000) ttt.executeMove(computerMove) print ttt if ttt.winner is not None: break print ttt print "Player", ttt.winner, "wins"
def playAgainstMCTS(): cf = connectfour.ConnectFourGameState() print cf while True: move = int(input("Make your move: ")) cf.executeMove(move) if cf.winner is not None: break node = mcts.MCTSNode(cf) computerMove = mcts.mcts(node, 10000) cf.executeMove(computerMove) print cf if cf.winner is not None: break print cf print "Player", cf.winner, "wins"
def initialize_game(self, status=None): if status == None: status = go.GoStatus() self.root = mcts.MCTSNode(status) self.result = 0
import mcts mtcss = mcts.MCTSNode(None, action_size=3) print(mtcss)
def test_add_child(self): root = mcts.MCTSNode(go.Position()) child = root.maybe_add_child(17) self.assertIn(17, root.children) self.assertEqual(root, child.parent) self.assertEqual(17, child.fmove)
def initialize_game(self, position=None): if position is None: position = deepxor.Position() self.root = mcts.MCTSNode(position)