def expand(n): """expand the leaf node by adding all its children states""" if not n.children and not game.terminal_test(n.state): n.children = { MCT_Node(state=game.result(n.state, action), parent=n): action for action in game.actions(n.state) } return select(n)
def monte_carlo_tree_search(state, game, N=1000): def select(n): """select a leaf node in the tree""" if n.children: return select(max(n.children.keys(), key=ucb)) else: return n def expand(n): """expand the leaf node by adding all its children states""" if not n.children and not game.terminal_test(n.state): n.children = { MCT_Node(state=game.result(n.state, action), parent=n): action for action in game.actions(n.state) } return select(n) def simulate(game, state): """simulate the utility of current state by random picking a step""" player = game.to_move(state) while not game.terminal_test(state): action = random.choice(list(game.actions(state))) state = game.result(state, action) v = game.utility(state, player) return -v def backprop(n, utility): """passing the utility back to all parent nodes""" if utility > 0: n.U += utility # if utility == 0: # n.U += 0.5 n.N += 1 if n.parent: backprop(n.parent, -utility) root = MCT_Node(state=state) for _ in range(N): leaf = select(root) child = expand(leaf) result = simulate(game, child.state) backprop(child, result) max_state = max(root.children, key=lambda p: p.N) return root.children.get(max_state)