def select_node(self, node: Node, c) -> Node: # Tree policy if node.player: best_child = np.argmax([ child.value + c * (math.log(node.N()) / child.N())**(1 / 2) for child in node.children ]) else: best_child = np.argmin([ child.value - c * (math.log(node.N()) / child.N())**(1 / 2) for child in node.children ]) return node.children[best_child]
def test_UCT(self): t = Tiger() a = Node(t) a.N = 10 a.children["b"] = Node(t) b = a.children["b"] b.N = 20 self.fail("finish the test")