Ejemplo n.º 1
0
    def good_print(self, board, node, color, num_nodes):
        cboard = board.copy()
        sys.stderr.write("\nTaking a tour of selection policy in tree! \n\n")
        sys.stderr.write(cboard.get_twoD_board())
        sys.stderr.flush()
        while not node.is_leaf():
            if node._move != None:
                if node._move != PASS:
                    pointString = board.point_to_string(move)
                else:
                    pointString = node._move
            else:
                pointString = 'Root'
            sys.stderr.write("\nMove: {} Numebr of children {}, Number of visits: {}\n"
                .format(pointString,len(node._children),node._n_visits))
            sys.stderr.flush()
            moves_ls = []
            max_flag = color == BLACK
            for move,child in node._children.items():
                uctval = uct_val(node,child,self.exploration,max_flag)
                moves_ls.append((move,uctval,child))
            moves_ls = sorted(moves_ls,key=lambda i:i[1],reverse=True)

            if moves_ls:
                sys.stderr.write("\nPrinting {} of {} childs that have highest UCT value \n\n".format(num_nodes, pointString))
                sys.stderr.flush()
                for i in range(num_nodes):
                    move = moves_ls[i][0]
                    child_val = moves_ls[i][1]
                    child_node = moves_ls[i][2]
                    if move !=PASS:
                        sys.stderr.write("\nChild point:{} ;UCT Value {}; Number of visits: {}; Number of Black wins: {}\n"
                            .format(cboard.point_to_string(move), child_val, child_node._n_visits, child_node._black_wins))
                        sys.stderr.flush()
                    else:
                        sys.stderr.write("\nChild point:{} ;UCT Value {}; Number of visits: {}; Number of Black wins: {} \n"
                            .format(move, child_val, child_node._n_visits, child_node._black_wins))
                        sys.stderr.flush()
            # Greedily select next move.
            max_flag = color == BLACK
            move, next_node = node.select(self.exploration,max_flag)
            if move==PASS:
                move = None
            assert cboard.check_legal(move, color)
            pointString = cboard.point_to_string(move)
            cboard.move(move, color)
            sys.stderr.write("\nBoard in simulation after chosing child {} in tree. \n".format(pointString))
            sys.stderr.write(cboard.get_twoD_board())
            sys.stderr.flush()
            color = GoBoardUtilGo4.opponent(color)
            node = next_node
        assert node.is_leaf()
        cboard.current_player = color
        leaf_value = self._evaluate_rollout(cboard, color)
        sys.stderr.write("\nWinner of simulation is: {} color, Black is 0 an \n".format(leaf_value))
        sys.stderr.flush()
Ejemplo n.º 2
0
 def update_with_move(self, last_move):
     """
     Step forward in the tree, keeping everything we already know about the subtree, assuming
     that get_move() has been called already. Siblings of the new root will be garbage-collected.
     """
     if last_move in self._root._children:
         self._root = self._root._children[last_move]
     else:
         self._root = TreeNode(None)
     self._root._parent = None
     self.toplay = GoBoardUtilGo4.opponent(self.toplay)
Ejemplo n.º 3
0
 def simulate(self, board, cboard, move, toplay):
     GoBoardUtilGo4.copyb2b(board, cboard)
     assert cboard.board.all() == board.board.all()
     cboard.move(move, toplay)
     opp = GoBoardUtilGo4.opponent(toplay)
     return GoBoardUtilGo4.playGame(cboard,
                                    opp,
                                    komi=self.komi,
                                    limit=self.limit,
                                    simulation_policy=simulations,
                                    use_pattern=self.use_pattern,
                                    check_selfatari=self.check_selfatari)
Ejemplo n.º 4
0
    def _playout(self, board, color):
        """
        Run a single playout from the root to the given depth, getting a value at the leaf and
        propagating it back through its parents. State is modified in-place, so a copy must be
        provided.

        Arguments:
        board -- a copy of the board.
        color -- color to play
        

        Returns:
        None
        """
        node = self._root
        node._use_knowledge = self.in_tree_knowledge
        # This will be True only once for the root
        if not node._expanded:
            node.expand(board, color)
            
            #Avoid the division by zero errors
            if self.in_tree_knowledge == "probabilistic":
                b_wins_sum, n_visit_sum = 0,0
                for child in node._children.values():
                    b_wins_sum += child._black_wins
                    n_visit_sum += child._n_visits
                    
                node._black_wins = b_wins_sum
                node._n_visits = n_visit_sum
        while not node.is_leaf():
            # Greedily select next move.                
            max_flag = color == BLACK
            move, next_node = node.select(self.exploration,max_flag)
            if move!=PASS:
                assert board.check_legal(move, color)
            if move == PASS:
                move = None
            board.move(move, color)
            color = GoBoardUtilGo4.opponent(color) 
            node = next_node
        assert node.is_leaf()
        if not node._expanded:
            node.expand(board, color)

        assert board.current_player == color
        leaf_value = self._evaluate_rollout(board, color)  
        # Update value and visit count of nodes in this traversal.
        node.update_recursive(leaf_value)
Ejemplo n.º 5
0
    def _playout(self, board, color):
        """
        Run a single playout from the root to the given depth, getting a value at the leaf and
        propagating it back through its parents. State is modified in-place, so a copy must be
        provided.

        Arguments:
        board -- a copy of the board.
        color -- color to play
        

        Returns:
        None
        """
        node = self._root
        # This will be True only once for the root
        if not node._expanded:
            node.expand(board, color)
        while not node.is_leaf():
            # Greedily select next move.
            max_flag = color == BLACK
            move, next_node = node.select(self.exploration, max_flag)
            if move != PASS:
                assert board.check_legal(move, color)
            if move == PASS:
                move = None
            board.move(move, color)
            color = GoBoardUtilGo4.opponent(color)
            node = next_node
        assert node.is_leaf()
        if not node._expanded:
            node.expand(board, color)

        assert board.current_player == color
        leaf_value = self._evaluate_rollout(board, color)
        # Update value and visit count of nodes in this traversal.
        node.update_recursive(leaf_value)