Beispiel #1
0
    def _minigui_report_search_status(self, leaves):
        """Prints the current MCTS search status to stderr.

        Reports the current search path, root node's child_Q, root node's
        child_N, the most visited path in a format that can be parsed by
        one of the STDERR_HANDLERS in minigui.ts.

        Args:
          leaves: list of leaf MCTSNodes returned by tree_search().
         """

        root = self._player.get_root()
        position = root.position

        msg = {
            "id": hex(id(root)),
            "n": int(root.N),
            "q": float(root.Q),
        }

        msg["childQ"] = [int(round(q * 1000)) for q in root.child_Q]
        msg["childN"] = [int(n) for n in root.child_N]

        ranked_children = root.rank_children()
        variations = {}
        for i in ranked_children[:15]:
            if root.child_N[i] == 0 or i not in root.children:
                break
            c = coords.to_gtp(coords.from_flat(i))
            child = root.children[i]
            nodes = child.most_visited_path_nodes()
            moves = [coords.to_gtp(coords.from_flat(m.fmove)) for m in nodes]
            variations[c] = {
                "n": int(root.child_N[i]),
                "q": float(root.child_Q[i]),
                "moves": [c] + moves,
            }

        if leaves:
            path = []
            leaf = leaves[0]
            while leaf != root:
                path.append(leaf.fmove)
                leaf = leaf.parent
            if path:
                path.reverse()
                variations["live"] = {
                    "n": int(root.child_N[path[0]]),
                    "q": float(root.child_Q[path[0]]),
                    "moves":
                    [coords.to_gtp(coords.from_flat(m)) for m in path]
                }

        if variations:
            msg["variations"] = variations

        dbg("mg-update:%s" % json.dumps(msg, sort_keys=True))
Beispiel #2
0
def main(argv):
    # It takes a couple of seconds to import anything from tensorflow, so only
    # do it if we need to read from GCS.
    path = argv[1]
    if path.startswith('gs://'):
        from tensorflow import gfile
        f = gfile.GFile(path, 'r')
    else:
        f = open(path, 'r')
    contents = f.read()
    f.close()

    # Determine the board size before importing any Minigo libraries because
    # require that the BOARD_SIZE environment variable is set correctly before
    # import.
    m = re.search(r'SZ\[([^]]+)', contents)
    if not m:
        print('Couldn\'t find SZ node, assuming 19x19 board')
        board_size = 19
    else:
        board_size = int(m.group(1))

    # Set the board size and import the Minigo libs.
    os.environ['BOARD_SIZE'] = str(board_size)
    import coords
    import go
    import sgf_wrapper

    # Replay the game.
    for x in sgf_wrapper.replay_sgf(contents):
        to_play = 'B' if x.position.to_play == 1 else 'W'
        print('{}>> {}: {}\n'.format(x.position, to_play,
                                     coords.to_gtp(x.next_move)))
Beispiel #3
0
def print_example(examples, i):
    example = examples[i]
    p = parse_board(example)
    print('\nExample %d of %d, %s to play, winner is %s' %
          (i + 1, len(examples), 'Black' if p.to_play == 1 else 'White',
           'Black' if example.value > 0 else 'White'))

    if example.n != -1:
        print(
            'N:%d  Q:%.3f  picked:%s' %
            (example.n, example.q, coords.to_gtp(coords.from_flat(example.c))))
    board_lines = str(p).split('\n')[:-2]

    mean = np.mean(example.pi[example.pi > 0])
    mx = np.max(example.pi)

    pi_lines = ['PI']
    for row in range(go.N):
        pi = []
        for col in range(go.N):
            stone = p.board[row, col]
            idx = row * go.N + col
            if example.c != -1:
                picked = example.c == row * go.N + col
            else:
                picked = False
            pi.append(format_pi(example.pi[idx], stone, mean, mx, picked))
        pi_lines.append(' '.join(pi))

    pi_lines.append(
        format_pi(example.pi[-1], go.EMPTY, mean, mx,
                  example.c == go.N * go.N))

    for b, p in zip(board_lines, pi_lines):
        print('%s  |  %s' % (b, p))
Beispiel #4
0
    def play_move(self, c, color=None, mutate=False):
        # Obeys CGOS Rules of Play. In short:
        # No suicides
        # Chinese/area scoring
        # Positional superko (this is very crudely approximate at the moment.)

        #print('board:',self.board);
        #print('input c:',c)

        if color is None:
            color = self.to_play

        pos = self if mutate else copy.deepcopy(self)

        if c is None:
            pos = pos.pass_move(mutate=mutate)
            return pos

        if not self.is_move_legal(c):
            raise IllegalMove("{} move at {} is illegal: \n{}".format(
                "Black" if self.to_play == BLACK else "White",
                coords.to_gtp(c), self))

        potential_ko = is_koish(self.board, c)

        place_stones(pos.board, color, [c])
        captured_stones = pos.lib_tracker.add_stone(color, c)
        place_stones(pos.board, EMPTY, captured_stones)

        opp_color = color * -1

        new_board_delta = np.zeros([N, N], dtype=np.int8)
        new_board_delta[c] = color
        place_stones(new_board_delta, color, captured_stones)

        if len(captured_stones) == 1 and potential_ko == opp_color:
            new_ko = list(captured_stones)[0]
        else:
            new_ko = None

        if pos.to_play == BLACK:
            new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1])
        else:
            new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones))

        pos.n += 1
        pos.caps = new_caps
        pos.ko = new_ko
        pos.recent += (PlayerMove(color, c), )

        # keep a rolling history of last 7 deltas - that's all we'll need to
        # extract the last 8 board states.
        pos.board_deltas = np.concatenate(
            (new_board_delta.reshape(1, N, N), pos.board_deltas[:6]))
        pos.to_play *= -1

        #print('output pos:\n',pos)
        #print('.........................')

        return pos
Beispiel #5
0
 def describe(self):
     ranked_children = self.rank_children()
     soft_n = self.child_N / max(1, sum(self.child_N))
     prior = self.child_prior
     p_delta = soft_n - prior
     p_rel = np.divide(p_delta, prior, out=np.zeros_like(
         p_delta), where=prior != 0)
     # Dump out some statistics
     output = []
     output.append("{q:.4f}\n".format(q=self.Q))
     output.append(self.most_visited_path())
     output.append(
         "move : action    Q     U     P   P-Dir    N  soft-N  p-delta  p-rel")
     for i in ranked_children[:15]:
         if self.child_N[i] == 0:
             break
         output.append("\n{!s:4} : {: .3f} {: .3f} {:.3f} {:.3f} {:.3f} {:5d} {:.4f} {: .5f} {: .2f}".format(
             coords.to_gtp(coords.from_flat(i)),
             self.child_action_score[i],
             self.child_Q[i],
             self.child_U[i],
             self.child_prior[i],
             self.original_prior[i],
             int(self.child_N[i]),
             soft_n[i],
             p_delta[i],
             p_rel[i]))
     return ''.join(output)
Beispiel #6
0
 def describe(self):
     sort_order = list(range(go.N * go.N + 1))
     sort_order.sort(key=lambda i: (
         self.child_N[i], self.child_action_score[i]), reverse=True)
     soft_n = self.child_N / max(1, sum(self.child_N))
     prior = self.child_prior
     p_delta = soft_n - prior
     p_rel = np.divide(p_delta, prior, out=np.zeros_like(
         p_delta), where=prior != 0)
     # Dump out some statistics
     output = []
     output.append("{q:.4}\n".format(q=str(self.Q)))
     output.append(self.most_visited_path())
     output.append(
         "move : action    Q     U     P   P-Dir    N  soft-N  p-delta  p-rel")
     for key in sort_order[:15]:
         if self.child_N[key] == 0:
             break
         output.append("\n{!s:4} : {: .3f} {: .3f} {:.3f} {:.3f} {:.3f} {:5d} {:.4f} {: .5f} {: .2f}".format(
             coords.to_gtp(coords.from_flat(key)),
             self.child_action_score[key],
             self.child_Q[key],
             self.child_U[key],
             self.child_prior[key],
             self.original_prior[key],
             int(self.child_N[key]),
             soft_n[key],
             p_delta[key],
             p_rel[key]))
     return ''.join(output)
Beispiel #7
0
    def _minigui_report_position(self):
        root = self._player.get_root()
        position = root.position

        board = []
        for row in range(go.N):
            for col in range(go.N):
                stone = position.board[row, col]
                if stone == go.BLACK:
                    board.append("X")
                elif stone == go.WHITE:
                    board.append("O")
                else:
                    board.append(".")

        msg = {
            "id": hex(id(root)),
            "toPlay": "B" if position.to_play == 1 else "W",
            "moveNum": position.n,
            "stones": "".join(board),
            "gameOver": position.is_game_over(),
            "caps": position.caps,
        }
        if root.parent and root.parent.parent:
            msg["parentId"] = hex(id(root.parent))
            msg["q"] = float(root.parent.Q)
        if position.recent:
            msg["move"] = coords.to_gtp(position.recent[-1].move)
        dbg("mg-position:%s" % json.dumps(msg, sort_keys=True))
    def test_parsing_9x9(self):
        self.assertEqual((0, 0), coords.from_sgf('aa'))
        self.assertEqual((2, 0), coords.from_sgf('ac'))
        self.assertEqual((0, 2), coords.from_sgf('ca'))
        self.assertEqual(None, coords.from_sgf(''))
        self.assertEqual('', coords.to_sgf(None))
        self.assertEqual('aa', coords.to_sgf(coords.from_sgf('aa')))
        self.assertEqual('sa', coords.to_sgf(coords.from_sgf('sa')))
        self.assertEqual((1, 17), coords.from_sgf(coords.to_sgf((1, 17))))
        self.assertEqual((8, 0), coords.from_gtp('A1'))
        self.assertEqual((0, 0), coords.from_gtp('A9'))
        self.assertEqual((7, 2), coords.from_gtp('C2'))
        self.assertEqual((7, 8), coords.from_gtp('J2'))

        self.assertEqual('J9', coords.to_gtp((0, 8)))
        self.assertEqual('A1', coords.to_gtp((8, 0)))
Beispiel #9
0
def play(network):
    """Plays out a self-play match, returning a MCTSPlayer object containing:
        - the final position
        - the n x 362 tensor of floats representing the mcts search probabilities
        - the n-ary tensor of floats representing the original value-net estimate
          where n is the number of moves in the game
    """
    readouts = FLAGS.num_readouts  # defined in strategies.py
    # Disable resign in 5% of games
    if random.random() < FLAGS.resign_disable_pct:
        resign_threshold = -1.0
    else:
        resign_threshold = None

    player = MCTSPlayer(network, resign_threshold=resign_threshold)

    player.initialize_game()

    # Must run this once at the start to expand the root node.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if FLAGS.verbose >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1 and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True)
        if FLAGS.verbose >= 3:
            print("Played >>",
                  coords.to_gtp(coords.from_flat(player.root.fmove)))

    if FLAGS.verbose >= 2:
        utils.dbg("%s: %.3f" % (player.result_string, player.root.Q))
        utils.dbg(player.root.position, player.root.position.score())

    return player
Beispiel #10
0
 def mvp_gg(self):
     """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17..."""
     output = []
     for node in self.most_visited_path_nodes():
         if max(node.child_N) <= 1:
             break
         output.append(coords.to_gtp(coords.from_flat(node.fmove)))
     return ' '.join(output)
    def test_topleft(self):
        self.assertEqual((0, 8), coords.from_sgf('ia'))
        self.assertEqual((0, 8), coords.from_flat(8))
        self.assertEqual((0, 8), coords.from_gtp('J9'))

        self.assertEqual('ia', coords.to_sgf((0, 8)))
        self.assertEqual(8, coords.to_flat((0, 8)))
        self.assertEqual('J9', coords.to_gtp((0, 8)))
    def test_upperleft(self):
        self.assertEqual((0, 0), coords.from_sgf('aa'))
        self.assertEqual((0, 0), coords.from_flat(0))
        self.assertEqual((0, 0), coords.from_gtp('A9'))

        self.assertEqual('aa', coords.to_sgf((0, 0)))
        self.assertEqual(0, coords.to_flat((0, 0)))
        self.assertEqual('A9', coords.to_gtp((0, 0)))
Beispiel #13
0
    def most_visited_path(self):
        output = []
        node = self
        for node in self.most_visited_path_nodes():
            output.append("%s (%d) ==> " % (
                coords.to_gtp(coords.from_flat(node.fmove)), node.N))

        output.append("Q: {:.5f}\n".format(node.Q))
        return ''.join(output)
    def test_pass(self):
        self.assertEqual(None, coords.from_sgf(''))
        self.assertEqual(None, coords.from_sgf('tt'))
        self.assertEqual(None, coords.from_flat(81))
        self.assertEqual(None, coords.from_gtp('pass'))
        self.assertEqual(None, coords.from_gtp('PASS'))

        self.assertEqual('', coords.to_sgf(None))
        self.assertEqual(81, coords.to_flat(None))
        self.assertEqual('pass', coords.to_gtp(None))
Beispiel #15
0
 def maybe_add_child(self, fcoord):
     """Adds child node for fcoord if it doesn't already exist, and returns it."""
     if fcoord not in self.children:
         new_position = self.position.play_move(coords.from_flat(fcoord))
         new_game_state = self.game_state.play_move(
             coords.to_gtp(coords.from_flat(fcoord)))
         self.children[fcoord] = MCTSNode(new_position,
                                          new_game_state,
                                          fmove=fcoord,
                                          parent=self)
     return self.children[fcoord]
def simulate(network, board = None, steps=20):
    '''
		Simulates rollout of network for given number of steps (to help understand the tactic)
    '''
    pos = Position(board=board)
    for i in range(steps):
        policy, V = network.run(pos)
        
        best_move = np.argmax(policy)
        print('Best move', coords.to_gtp(coords.from_flat(best_move)))
        pos = pos.play_move(coords.from_flat(best_move))
        print(pos)
Beispiel #17
0
    def cmd_genmove(self, color=None):
        if color is not None:
            self._accomodate_out_of_turn(color)

        if self._courtesy_pass:
            # If courtesy pass is True and the previous move was a pass, we'll
            # pass too, regardless of score or our opinion on the game.
            position = self._player.get_position()
            if position.recent and position.recent[-1].move is None:
                return "pass"

        move = self._player.suggest_move(self._player.get_position())
        if self._player.should_resign():
            self._player.set_result(-1 * self._player.get_position().to_play,
                                    was_resign=True)
            return "resign"

        self._player.play_move(move)
        if self._player.get_root().is_done():
            self._player.set_result(self._player.get_position().result(),
                                    was_resign=False)
        return coords.to_gtp(move)
def play_mcts(network, board=None):
    pos = Position(board=board)

    player = get_mcts_player(network, pos)
    node = player.root
    children = node.rank_children()
    soft_n = node.child_N / max(1, sum(node.child_N))

    original_moves = {}

    heatmap = np.zeros((N, N), dtype=np.float)
    a_b = None
    for i in children:
        if node.child_N[i] == 0:
            break
        if a_b is None:
            a_b = coords.from_flat(i)
        original_moves[coords.to_gtp(coords.from_flat(i))] = soft_n[i]

    a_b = player.pick_move()
    
    # player.play_move(move)
    a_b_coords = a_b
    a_b = coords.to_gtp(a_b)

    print(original_moves)
    print("best action: ", a_b)
    print(node.position)
    p = original_moves[a_b]
    print(p)

    for i in range(N):
        for j in range(N):
            if board[i][j] == -1 or board[i][j] == 1:
                new_board = np.copy(board)
                new_board[i, j] = 0
                new_pos = perturb_position(pos, new_board)
                if new_pos.is_move_legal(a_b_coords):
                    player = get_mcts_player(network, new_pos)
                    node = player.root
                    print(node.position)
                    new_moves = {}
                    children = node.rank_children()
                    soft_n = node.child_N / max(1, sum(node.child_N))
                    for ch in children:
                        if node.child_N[ch] == 0:
                            break
                        new_moves[coords.to_gtp(coords.from_flat(ch))] = soft_n[ch]

                    new_a_b = player.pick_move()
                    # player.play_move(move)
                    new_a_b = coords.to_gtp(new_a_b)
                    # if new_a_b == 'F5':
                    print("---------------------")
                    # print("Moves: ", new_moves)    
                    if a_b in new_moves:
                        new_p = new_moves[a_b]
                    else:
                        new_p = 0.
                    print("New best move", new_a_b)
                    print("p", new_p)
                    print("------------------")

                    K = cross_entropy_mcts(original_moves, new_moves, a_b)
                    if K == -1:
                        print("index", i, j)
                        heatmap[i, j] = -1.0
                        continue
                    dP = p - new_p
                    if dP > 0:
                        heatmap[i, j] = 2.0*dP/(1. + dP*K)
                else:
                    heatmap[i, j] = -1.0

    heatmap[heatmap == -1] = np.max(heatmap)
    heatmap[heatmap<np.max(heatmap)/1.5] = 0
    plt.imshow(heatmap, cmap='jet')
    plt.colorbar()
    plt.show()
    return player
def play_network(network, board=None):
    '''
		Generates saliency maps of 3 methods given a board position
    '''
    pos = Position(board=board)
    original_moves = {}
    heatmap = np.zeros((N,N), dtype=np.float)
    
    policy, V = network.run(pos)
    
    best_move = np.argmax(policy)
    print("Best Move is", coords.to_gtp(coords.from_flat(best_move)))
    p = np.max(policy)
    
    player = get_mcts_player(network, pos)
    node = player.root

    old_Q = node.child_Q[best_move]

    atariV = np.zeros([N, N], dtype=np.float)
    atariP = np.zeros([N, N], dtype=np.float)
    delQ = np.zeros([N, N], dtype=np.float)
    heatmap = np.zeros([N, N], dtype=np.float)
    for i in range(N):
        for j in range(N):
            if board[i, j] == 1 or board[i, j] == -1:
                print(i, j)
                print("---------------------")

                new_board = np.copy(board)
                new_board[i, j] = 0
                new_pos = perturb_position(pos, new_board)
                new_policy, new_V = network.run(new_pos)
                new_p = new_policy[best_move]

                player = get_mcts_player(network, pos)
                node = player.root
                # print(node.describe())
                new_Q = node.child_Q[best_move]

                atariV[i, j] = 0.5*((V - new_V)**2)
                atariP[i, j] = 0.5*np.linalg.norm(policy - new_policy)
                dP = p - new_p
                
                dQ = old_Q - new_Q
                K = cross_entropy(policy, new_policy, best_move)
                if dP>0:
                    heatmap[i, j] = 2*dP/(1 + dP*K)

                if dQ>0:
                    delQ[i, j] = dQ

    atariV = (atariV - np.min(atariV))/(np.max(atariV) - np.min(atariV))
    atariP = (atariP - np.min(atariP))/(np.max(atariP) - np.min(atariP))

    # heatmap[heatmap < np.max(heatmap)/3] = 0
    # atariV[atariV < np.max(atariV)/3] = 0
    # atariP[atariP < np.max(atariP)/3] = 0
    # delQ[delQ < np.max(delQ)/3] = 0
    

    frame = np.zeros((N,N,3))
    frame = saliency_combine(atariV, frame, blur=256, channel=2)
    frame = saliency_combine(atariP, frame, blur=256, channel=0)

    plt.figure(1)
    plt.imshow(atariV, cmap = 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'atariV.png')
    plt.show()
    
    plt.figure(2)
    plt.imshow(atariP, cmap= 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'atariP.png')
    plt.show()

    plt.figure(3)
    plt.imshow(frame)
    plt.savefig(save_path + 'atari.png')
    plt.show()


    plt.figure(4)
    plt.imshow(delQ, cmap = 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'deltaQ.png')
    plt.show()

    plt.figure(5)
    plt.imshow(heatmap, cmap = 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'entropy.png')
    plt.show()
Beispiel #20
0
def extract_move_data(root_node, worker_id, completed_time, board_size):
    current_node = root_node.next
    move_data = []
    move_num = 1
    while current_node is not None:
        props = current_node.properties
        if 'B' in props:
            to_play = 1
            move_played = props['B'][0]
        elif 'W' in props:
            to_play = -1
            move_played = props['W'][0]
        else:
            import pdb
            pdb.set_trace()
        move_played = coords.to_flat(coords.from_sgf(move_played))
        post_Q, debug_rows = parse_comment_node(props['C'][0])

        def get_row_data(debug_row):
            column_names = ["prior", "orig_prior", "N", "soft_N"]
            return [getattr(debug_row, field) for field in column_names]

        if FLAGS.only_top_move:
            assert len(debug_rows) <= 1
            row_data = list(map(get_row_data, debug_rows))
        else:
            row_data = [[0] * 4 for _ in range(board_size * board_size + 1)]
            for debug_row in debug_rows:
                move = debug_row.move
                row_data[move] = get_row_data(debug_row)

        policy_prior, policy_prior_orig, mcts_visits, mcts_visits_norm = \
            zip(*row_data)

        move_data.append({
            'worker_id':
            worker_id,
            'completed_time':
            completed_time,
            'move_num':
            move_num,
            'turn_to_play':
            to_play,
            'move':
            move_played,
            'move_kgs':
            coords.to_gtp(coords.from_flat(move_played)),
            'prior_Q':
            None,
            'post_Q':
            post_Q,
            'policy_prior':
            policy_prior,
            'policy_prior_orig':
            policy_prior_orig,
            'mcts_visit_counts':
            mcts_visits,
            'mcts_visit_counts_norm':
            mcts_visits_norm,
        })
        move_num += 1
        current_node = current_node.next
    return move_data
Beispiel #21
0
 def _heatmap(self, sort_order, node, prop):
     return "\n".join([
         "{!s:6} {}".format(coords.to_gtp(coords.from_flat(key)),
                            node.__dict__.get(prop)[key])
         for key in sort_order if node.child_N[key] > 0
     ][:20])
Beispiel #22
0
 def fmt(move):
     return "{}-{}".format('b' if move.color == go.BLACK else 'w',
                           coords.to_gtp(move.move))