Beispiel #1
0
 def describe(self):
     sort_order = list(range(self.board_size * self.board_size + 1))
     sort_order.sort(key=lambda i: (
         self.child_N[i], self.child_action_score[i]), reverse=True)
     soft_n = self.child_N / sum(self.child_N)
     p_delta = soft_n - self.child_prior
     p_rel = p_delta / self.child_prior
     # Dump out some statistics
     output = []
     output.append('{q:.4f}\n'.format(q=self.Q))
     output.append(self.most_visited_path())
     output.append(
         '''move:  action      Q      U      P    P-Dir    N  soft-N
     p-delta  p-rel\n''')
     output.append(
         '\n'.join([
                       '''{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f}
         {: .5f} {: .2f}'''.format(
                           coords.to_kgs(self.board_size, coords.from_flat(
                               self.board_size, key)),
                           self.child_action_score[key],
                           self.child_Q[key],
                           self.child_U[key],
                           self.child_prior[key],
                           self.original_prior[key],
                           int(self.child_N[key]),
                           soft_n[key],
                           p_delta[key],
                           p_rel[key])
                       for key in sort_order][:15]))
     return ''.join(output)
Beispiel #2
0
 def mvp_gg(self):
     """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17..."""
     node = self
     output = []
     while node.children and max(node.child_N) > 1:
         next_kid = np.argmax(node.child_N)
         node = node.children[next_kid]
         output.append('{}'.format(coords.to_kgs(
             self.board_size, coords.from_flat(self.board_size, node.fmove))))
     return ' '.join(output)
Beispiel #3
0
 def most_visited_path(self):
     node = self
     output = []
     while node.children:
         next_kid = np.argmax(node.child_N)
         node = node.children.get(next_kid)
         if node is None:
             output.append('GAME END')
             break
         output.append('{} ({}) ==> '.format(
             coords.to_kgs(
                 self.board_size,
                 coords.from_flat(self.board_size, node.fmove)), node.N))
     output.append('Q: {:.5f}\n'.format(node.Q))
     return ''.join(output)
Beispiel #4
0
def play(board_size,
         network,
         readouts,
         resign_threshold,
         simultaneous_leaves,
         verbosity=0):
    """Plays out a self-play match.

  Args:
    board_size: the go board size
    network: the DualNet model
    readouts: the number of readouts in MCTS
    resign_threshold: the threshold to resign at in the match
    simultaneous_leaves: the number of simultaneous leaves in MCTS
    verbosity: the verbosity of the self-play match

  Returns:
    the final position
    the n x 362 tensor of floats representing the mcts search probabilities
    the n-ary tensor of floats representing the original value-net estimate
      where n is the number of moves in the game.
  """
    player = MCTSPlayer(board_size,
                        network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=simultaneous_leaves)
    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if verbosity >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1
                                and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (player.root.position.n, readouts, dur / readouts * 100.0,
                   dur))
        if verbosity >= 3:
            print("Played >>",
                  coords.to_kgs(coords.from_flat(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q),
              file=sys.stderr)
        print(player.root.position,
              player.root.position.score(),
              file=sys.stderr)

    return player
Beispiel #5
0
    def play_move(self, c, color=None, mutate=False):
        """Obeys CGOS Rules of Play.

    In short:
    No suicides
    Chinese/area scoring
    Positional superko (this is very crudely approximate at the moment.)

    Args:
      c: the coordinate to play from.
      color: the color of the player to play.
      mutate:

    Returns:
      The position of next move.

    Raises:
      IllegalMove: if the input c is an illegal move.
    """
        if color is None:
            color = self.to_play

        pos = self if mutate else copy.deepcopy(self)

        if c is None:
            pos = pos.pass_move(mutate=mutate)
            return pos

        if not self.is_move_legal(c):
            raise IllegalMove('{} move at {} is illegal: \n{}'.format(
                'Black' if self.to_play == BLACK else 'White',
                coords.to_kgs(self.board_size, c), self))

        potential_ko = is_koish(self.board_size, self.board, c)

        place_stones(pos.board, color, [c])
        captured_stones = pos.lib_tracker.add_stone(color, c)
        place_stones(pos.board, EMPTY, captured_stones)

        opp_color = -1 * color

        new_board_delta = np.zeros([self.board_size, self.board_size],
                                   dtype=np.int8)
        new_board_delta[c] = color
        place_stones(new_board_delta, color, captured_stones)

        if len(captured_stones) == 1 and potential_ko == opp_color:
            new_ko = list(captured_stones)[0]
        else:
            new_ko = None

        if pos.to_play == BLACK:
            new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1])
        else:
            new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones))

        pos.n += 1
        pos.caps = new_caps
        pos.ko = new_ko
        pos.recent += (PlayerMove(color, c),)

        # keep a rolling history of last 7 deltas - that's all we'll need to
        # extract the last 8 board states.
        pos.board_deltas = np.concatenate((
            new_board_delta.reshape(1, self.board_size, self.board_size),
            pos.board_deltas[:6]))
        pos.to_play *= -1
        return pos
Beispiel #6
0
 def heatmap(self, sort_order, node, prop):
     return '\n'.join([
         '{!s:6} {}'.format(coords.to_kgs(coords.from_flat(key)),
                            node.__dict__.get(prop)[key])
         for key in sort_order if node.child_N[key] > 0
     ][:20])
Beispiel #7
0
 def fmt(move):
     return '{}-{}'.format('b' if move.color == 1 else 'w',
                           coords.to_kgs(self.board_size, move.move))