コード例 #1
0
    def _minigui_report_search_status(self, leaves):
        """Prints the current MCTS search status to stderr.

    Reports the current search path, root node's child_Q, root node's
    child_N, the most visited path in a format that can be parsed by
    one of the STDERR_HANDLERS in minigui.ts.

    Args:
      leaves: list of leaf MCTSNodes returned by tree_search().
    """

        root = self._player.get_root()

        msg = {
            "id": hex(id(root)),
            "n": int(root.N),
            "q": float(root.Q),
        }

        msg["childQ"] = [int(round(q * 1000)) for q in root.child_Q]
        msg["childN"] = [int(n) for n in root.child_N]

        ranked_children = root.rank_children()
        variations = {}
        for i in ranked_children[:15]:
            if root.child_N[i] == 0 or i not in root.children:
                break
            c = coords.to_gtp(coords.from_flat(i))
            child = root.children[i]
            nodes = child.most_visited_path_nodes()
            moves = [coords.to_gtp(coords.from_flat(m.fmove)) for m in nodes]
            variations[c] = {
                "n": int(root.child_N[i]),
                "q": float(root.child_Q[i]),
                "moves": [c] + moves,
            }

        if leaves:
            path = []
            leaf = leaves[0]
            while leaf != root:
                path.append(leaf.fmove)
                leaf = leaf.parent
            if path:
                path.reverse()
                variations["live"] = {
                    "n": int(root.child_N[path[0]]),
                    "q": float(root.child_Q[path[0]]),
                    "moves":
                    [coords.to_gtp(coords.from_flat(m)) for m in path]
                }

        if variations:
            msg["variations"] = variations

        dbg("mg-update:%s" % json.dumps(msg, sort_keys=True))
コード例 #2
0
    def _minigui_report_position(self):
        root = self._player.get_root()
        position = root.position

        board = []
        for row in range(go.N):
            for col in range(go.N):
                stone = position.board[row, col]
                if stone == go.BLACK:
                    board.append("X")
                elif stone == go.WHITE:
                    board.append("O")
                else:
                    board.append(".")

        msg = {
            "id": hex(id(root)),
            "toPlay": "B" if position.to_play == 1 else "W",
            "moveNum": position.n,
            "stones": "".join(board),
            "gameOver": position.is_game_over(),
            "caps": position.caps,
        }
        if root.parent and root.parent.parent:
            msg["parentId"] = hex(id(root.parent))
            msg["q"] = float(root.parent.Q)
        if position.recent:
            msg["move"] = coords.to_gtp(position.recent[-1].move)
        dbg("mg-position:%s" % json.dumps(msg, sort_keys=True))
コード例 #3
0
def print_example(examples, i):
    example = examples[i]
    p = parse_board(example)
    print('\nExample %d of %d, %s to play, winner is %s' %
          (i + 1, len(examples), 'Black' if p.to_play == 1 else 'White',
           'Black' if example.value > 0 else 'White'))

    if example.n != -1:
        print(
            'N:%d  Q:%.3f  picked:%s' %
            (example.n, example.q, coords.to_gtp(coords.from_flat(example.c))))
    board_lines = str(p).split('\n')[:-2]

    mean = np.mean(example.pi[example.pi > 0])
    mx = np.max(example.pi)

    pi_lines = ['PI']
    for row in range(go.N):
        pi = []
        for col in range(go.N):
            stone = p.board[row, col]
            idx = row * go.N + col
            if example.c != -1:
                picked = example.c == row * go.N + col
            else:
                picked = False
            pi.append(format_pi(example.pi[idx], stone, mean, mx, picked))
        pi_lines.append(' '.join(pi))

    pi_lines.append(
        format_pi(example.pi[-1], go.EMPTY, mean, mx,
                  example.c == go.N * go.N))

    for b, p in zip(board_lines, pi_lines):
        print('%s  |  %s' % (b, p))
コード例 #4
0
ファイル: mcts.py プロジェクト: shjwudp/training_results_v0.7
 def mvp_gg(self):
   """Returns most visited path in go-gui VAR format e.g. 'b r3 w c17..."""
   output = []
   for node in self.most_visited_path_nodes():
     if max(node.child_N) <= 1:
       break
     output.append(coords.to_gtp(coords.from_flat(node.fmove)))
   return ' '.join(output)
コード例 #5
0
ファイル: mcts.py プロジェクト: shjwudp/training_results_v0.7
  def most_visited_path(self):
    output = []
    node = self
    for node in self.most_visited_path_nodes():
      output.append('%s (%d) ==> ' %
                    (coords.to_gtp(coords.from_flat(node.fmove)), node.N))

    output.append('Q: {:.5f}\n'.format(node.Q))
    return ''.join(output)
コード例 #6
0
ファイル: go.py プロジェクト: shjwudp/training_results_v0.7
    def play_move(self, c, color=None, mutate=False):
        # Obeys CGOS Rules of Play. In short:
        # No suicides
        # Chinese/area scoring
        # Positional superko (this is very crudely approximate at the moment.)
        if color is None:
            color = self.to_play

        pos = self if mutate else copy.deepcopy(self)

        if c is None:
            pos = pos.pass_move(mutate=mutate)
            return pos

        if not self.is_move_legal(c):
            raise IllegalMove('{} move at {} is illegal: \n{}'.format(
                'Black' if self.to_play == BLACK else 'White',
                coords.to_gtp(c), self))

        potential_ko = is_koish(self.board, c)

        place_stones(pos.board, color, [c])
        captured_stones = pos.lib_tracker.add_stone(color, c)
        place_stones(pos.board, EMPTY, captured_stones)

        opp_color = color * -1

        new_board_delta = np.zeros([N, N], dtype=np.int8)
        new_board_delta[c] = color
        place_stones(new_board_delta, color, captured_stones)

        if len(captured_stones) == 1 and potential_ko == opp_color:
            new_ko = list(captured_stones)[0]
        else:
            new_ko = None

        if pos.to_play == BLACK:
            new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1])
        else:
            new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones))

        pos.n += 1
        pos.caps = new_caps
        pos.ko = new_ko
        pos.recent += (PlayerMove(color, c), )

        # keep a rolling history of last 7 deltas - that's all we'll need to
        # extract the last 8 board states.
        pos.board_deltas = np.concatenate(
            (new_board_delta.reshape(1, N, N), pos.board_deltas[:6]))
        pos.to_play *= -1
        return pos
コード例 #7
0
    def cmd_genmove(self, color=None):
        if color is not None:
            self._accomodate_out_of_turn(color)

        if self._courtesy_pass:
            # If courtesy pass is True and the previous move was a pass, we'll
            # pass too, regardless of score or our opinion on the game.
            position = self._player.get_position()
            if position.recent and position.recent[-1].move is None:
                return "pass"

        move = self._player.suggest_move(self._player.get_position())
        if self._player.should_resign():
            self._player.set_result(-1 * self._player.get_position().to_play,
                                    was_resign=True)
            return "resign"

        self._player.play_move(move)
        if self._player.get_root().is_done():
            self._player.set_result(self._player.get_position().result(),
                                    was_resign=False)
        return coords.to_gtp(move)
コード例 #8
0
ファイル: mcts.py プロジェクト: shjwudp/training_results_v0.7
 def describe(self):
   ranked_children = self.rank_children()
   soft_n = self.child_N / max(1, sum(self.child_N))
   prior = self.child_prior
   p_delta = soft_n - prior
   p_rel = np.divide(
       p_delta, prior, out=np.zeros_like(p_delta), where=prior != 0)
   # Dump out some statistics
   output = []
   output.append('{q:.4f}\n'.format(q=self.Q))
   output.append(self.most_visited_path())
   output.append(
       'move : action    Q     U     P   P-Dir    N  soft-N  p-delta  p-rel')
   for i in ranked_children[:15]:
     if self.child_N[i] == 0:
       break
     output.append(
         '\n{!s:4} : {: .3f} {: .3f} {:.3f} {:.3f} {:.3f} {:5d} {:.4f} {: .5f} {: .2f}'
         .format(
             coords.to_gtp(coords.from_flat(i)), self.child_action_score[i],
             self.child_Q[i],
             self.child_U[i], self.child_prior[i], self.original_prior[i],
             int(self.child_N[i]), soft_n[i], p_delta[i], p_rel[i]))
   return ''.join(output)
コード例 #9
0
 def _heatmap(self, sort_order, node, prop):
     return "\n".join([
         "{!s:6} {}".format(coords.to_gtp(coords.from_flat(key)),
                            node.__dict__.get(prop)[key])
         for key in sort_order if node.child_N[key] > 0
     ][:20])
コード例 #10
0
def play(network):
    """Plays out a self-play match, returning a MCTSPlayer object containing.

        - the final position
        - the n x 362 tensor of floats representing the mcts search
        probabilities
        - the n-ary tensor of floats representing the original value-net
        estimate
          where n is the number of moves in the game
  """
    readouts = FLAGS.num_readouts  # defined in strategies.py
    # Disable resign in 5% of games
    if random.random() < FLAGS.resign_disable_pct:
        resign_threshold = -1.0
    else:
        resign_threshold = None

    player = MCTSPlayer(network, resign_threshold=resign_threshold)

    player.initialize_game()

    # Must run this once at the start to expand the root node.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if FLAGS.verbose >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1
                                    and player.root.position.n % 10 == 9):
            print('Q: {:.5f}'.format(player.root.Q))
            dur = time.time() - start
            print('%d: %d readouts, %.3f s/100. (%.2f sec)' %
                  (player.root.position.n, readouts, dur / readouts * 100.0,
                   dur))
        if FLAGS.verbose >= 3:
            print('Played >>',
                  coords.to_gtp(coords.from_flat(player.root.fmove)))

    if FLAGS.verbose >= 2:
        utils.dbg('%s: %.3f' % (player.result_string, player.root.Q))
        utils.dbg(player.root.position, player.root.position.score())

    return player