Beispiel #1
0
 def select_node(self, node: Node, c) -> Node:  # Tree policy
     if node.player:
         best_child = np.argmax([
             child.value + c * (math.log(node.N()) / child.N())**(1 / 2)
             for child in node.children
         ])
     else:
         best_child = np.argmin([
             child.value - c * (math.log(node.N()) / child.N())**(1 / 2)
             for child in node.children
         ])
     return node.children[best_child]
Beispiel #2
0
    def test_UCT(self):

        t = Tiger()
        a = Node(t)

        a.N = 10

        a.children["b"] = Node(t)

        b = a.children["b"]

        b.N = 20

        

        self.fail("finish the test")