예제 #1
0
 def test_flatten(self):
     self.assertEqual(coords.to_flat((0, 0)), 0)
     self.assertEqual(coords.to_flat((0, 3)), 3)
     self.assertEqual(coords.to_flat((3, 0)), 27)
     self.assertEqual(coords.from_flat(27), (3, 0))
     self.assertEqual(coords.from_flat(10), (1, 1))
     self.assertEqual(coords.from_flat(80), (8, 8))
     self.assertEqual(coords.to_flat(
         coords.from_flat(10)), 10)
     self.assertEqual(coords.from_flat(
         coords.to_flat((5, 4))), (5, 4))
예제 #2
0
 def test_flatten(self):
   self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 0)), 0)
   self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 3)), 3)
   self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (3, 0)), 27)
   self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 27), (3, 0))
   self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 10), (1, 1))
   self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 80), (8, 8))
   self.assertEqual(coords.to_flat(
       utils_test.BOARD_SIZE, coords.from_flat(utils_test.BOARD_SIZE, 10)), 10)
   self.assertEqual(coords.from_flat(
       utils_test.BOARD_SIZE, coords.to_flat(
           utils_test.BOARD_SIZE, (5, 4))), (5, 4))
예제 #3
0
 def test_proper_move_transform(self):
     # Check that the reinterpretation of 362 = 19*19 + 1 during symmetry
     # application is consistent with coords.from_flat
     move_array = np.arange(go.N ** 2 + 1)
     coord_array = np.zeros([go.N, go.N])
     for c in range(go.N ** 2):
         coord_array[coords.from_flat(c)] = c
     for s in symmetries.SYMMETRIES:
         with self.subTest(symmetry=s):
             transformed_moves = apply_p(s, move_array)
             transformed_board = apply_f(s, coord_array)
             for new_coord, old_coord in enumerate(transformed_moves[:-1]):
                 self.assertEqual(
                     old_coord,
                     transformed_board[coords.from_flat(new_coord)])
예제 #4
0
 def describe(self):
     sort_order = list(range(go.N * go.N + 1))
     sort_order.sort(key=lambda i: (
         self.child_N[i], self.child_action_score[i]), reverse=True)
     soft_n = self.child_N / sum(self.child_N)
     p_delta = soft_n - self.child_prior
     p_rel = p_delta / self.child_prior
     # Dump out some statistics
     output = []
     output.append("{q:.4f}\n".format(q=self.Q))
     output.append(self.most_visited_path())
     output.append(
         "move:  action      Q      U      P    P-Dir    N  soft-N  p-delta  p-rel\n")
     output.append("\n".join(["{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}".format(
         coords.to_kgs(coords.from_flat(key)),
         self.child_action_score[key],
         self.child_Q[key],
         self.child_U[key],
         self.child_prior[key],
         self.original_prior[key],
         int(self.child_N[key]),
         soft_n[key],
         p_delta[key],
         p_rel[key])
         for key in sort_order][:15]))
     return ''.join(output)
예제 #5
0
 def maybe_add_child(self, fcoord):
     """ Adds child node for fcoord if it doesn't already exist, and returns it. """
     if fcoord not in self.children:
         new_position = self.position.play_move(
             coords.from_flat(fcoord))
         self.children[fcoord] = MCTSNode(
             new_position, fmove=fcoord, parent=self)
     return self.children[fcoord]
예제 #6
0
  def test_pass(self):
    self.assertEqual(coords.from_sgf(''), None)
    self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 81), None)
    self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'pass'), None)
    self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (0, 0)), None)

    self.assertEqual(coords.to_sgf(None), '')
    self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, None), 81)
    self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, None), 'pass')
    self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, None), (0, 0))
예제 #7
0
  def test_topleft(self):
    self.assertEqual(coords.from_sgf('ia'), (0, 8))
    self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 8), (0, 8))
    self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'J9'), (0, 8))
    self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (9, 9)), (0, 8))

    self.assertEqual(coords.to_sgf((0, 8)), 'ia')
    self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 8)), 8)
    self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 8)), 'J9')
    self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 8)), (9, 9))
예제 #8
0
  def test_upperleft(self):
    self.assertEqual(coords.from_sgf('aa'), (0, 0))
    self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 0), (0, 0))
    self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'A9'), (0, 0))
    self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (1, 9)), (0, 0))

    self.assertEqual(coords.to_sgf((0, 0)), 'aa')
    self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 0)), 0)
    self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 0)), 'A9')
    self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 0)), (1, 9))
예제 #9
0
 def mvp_gg(self):
     """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17..."""
     node = self
     output = []
     while node.children and max(node.child_N) > 1:
         next_kid = np.argmax(node.child_N)
         node = node.children[next_kid]
         output.append("%s" % coords.to_kgs(
             coords.from_flat(node.fmove)))
     return ' '.join(output)
예제 #10
0
    def test_pass(self):
        self.assertEqual(coords.from_sgf(''), None)
        self.assertEqual(coords.from_flat(81), None)
        self.assertEqual(coords.from_kgs('pass'), None)
        self.assertEqual(coords.from_pygtp((0, 0)), None)

        self.assertEqual(coords.to_sgf(None), '')
        self.assertEqual(coords.to_flat(None), 81)
        self.assertEqual(coords.to_kgs(None), 'pass')
        self.assertEqual(coords.to_pygtp(None), (0, 0))
예제 #11
0
    def test_topleft(self):
        self.assertEqual(coords.from_sgf('ia'), (0, 8))
        self.assertEqual(coords.from_flat(8), (0, 8))
        self.assertEqual(coords.from_kgs('J9'), (0, 8))
        self.assertEqual(coords.from_pygtp((9, 9)), (0, 8))

        self.assertEqual(coords.to_sgf((0, 8)), 'ia')
        self.assertEqual(coords.to_flat((0, 8)), 8)
        self.assertEqual(coords.to_kgs((0, 8)), 'J9')
        self.assertEqual(coords.to_pygtp((0, 8)), (9, 9))
예제 #12
0
    def test_upperleft(self):
        self.assertEqual(coords.from_sgf('aa'), (0, 0))
        self.assertEqual(coords.from_flat(0), (0, 0))
        self.assertEqual(coords.from_kgs('A9'), (0, 0))
        self.assertEqual(coords.from_pygtp((1, 9)), (0, 0))

        self.assertEqual(coords.to_sgf((0, 0)), 'aa')
        self.assertEqual(coords.to_flat((0, 0)), 0)
        self.assertEqual(coords.to_kgs((0, 0)), 'A9')
        self.assertEqual(coords.to_pygtp((0, 0)), (1, 9))
예제 #13
0
    def test_legal_moves(self):
        board = test_utils.load_board('''
            .O.O.XOX.
            O..OOOOOX
            ......O.O
            OO.....OX
            XO.....X.
            .O.......
            OX.....OO
            XX...OOOX
            .....O.X.
        ''')
        position = Position(board=board, to_play=BLACK)
        illegal_moves = coords_from_kgs_set('A9 E9 J9')
        legal_moves = coords_from_kgs_set('A4 G1 J1 H7') | {None}
        for move in illegal_moves:
            with self.subTest(type='illegal', move=move):
                self.assertFalse(position.is_move_legal(move))
        for move in legal_moves:
            with self.subTest(type='legal', move=move):
                self.assertTrue(position.is_move_legal(move))
        # check that the bulk legal test agrees with move-by-move illegal test.
        bulk_legality = position.all_legal_moves()
        for i, bulk_legal in enumerate(bulk_legality):
            with self.subTest(type='bulk', move=coords.from_flat(i)):
                self.assertEqual(
                    bulk_legal, position.is_move_legal(coords.from_flat(i)))

        # flip the colors and check that everything is still (il)legal
        position = Position(board=-board, to_play=WHITE)
        for move in illegal_moves:
            with self.subTest(type='illegal', move=move):
                self.assertFalse(position.is_move_legal(move))
        for move in legal_moves:
            with self.subTest(type='legal', move=move):
                self.assertTrue(position.is_move_legal(move))
        bulk_legality = position.all_legal_moves()
        for i, bulk_legal in enumerate(bulk_legality):
            with self.subTest(type='bulk', move=coords.from_flat(i)):
                self.assertEqual(
                    bulk_legal, position.is_move_legal(coords.from_flat(i)))
예제 #14
0
 def most_visited_path(self):
     node = self
     output = []
     while node.children:
         next_kid = np.argmax(node.child_N)
         node = node.children.get(next_kid)
         if node is None:
             output.append("GAME END")
             break
         output.append("%s (%d) ==> " % (coords.to_kgs(
                                         coords.from_flat(node.fmove)),
                                         node.N))
     output.append("Q: {:.5f}\n".format(node.Q))
     return ''.join(output)
예제 #15
0
 def most_visited_path(self):
     node = self
     output = []
     while node.children:
         next_kid = np.argmax(node.child_N)
         node = node.children.get(next_kid)
         if node is None:
             output.append("GAME END")
             break
         output.append(
             "%s (%d) ==> " %
             (coords.to_kgs(coords.from_flat(node.fmove)), node.N))
     output.append("Q: {:.5f}\n".format(node.Q))
     return ''.join(output)
예제 #16
0
    def pick_move(self):
        '''Picks a move to play, based on MCTS readout statistics.

        Highest N is most robust indicator. In the early stage of the game, pick
        a move weighted by visit count; later on, pick the absolute max.'''
        if self.root.position.n >= self.temp_threshold:
            fcoord = np.argmax(self.root.child_N)
        else:
            cdf = self.root.child_N.cumsum()
            cdf /= cdf[-2]  # Prevents passing via softpick.
            selection = random.random()
            fcoord = cdf.searchsorted(selection)
            assert self.root.child_N[fcoord] != 0
        return coords.from_flat(fcoord)
예제 #17
0
def apply(action, history):
    """ Apply the action as the next move of given history. 
    action: legal move, given as flat coordinates.
    history: history of the game so far.
    """
    board = history[-1].copy()
    to_play = -1 if len(history) % 2 == 0 else 1
    # if not pass
    if action is not board.size:
        p = go.Position(board=board, to_play=to_play)
        p.play_move(coords.from_flat(action), mutate=True)
        if p.ko is not None:
            board[p.ko] = 4
    history.append(board)
예제 #18
0
    def pick_move(self):
        '''Picks a move to play, based on MCTS readout statistics.

        Highest N is most robust indicator. In the early stage of the game, pick
        a move weighted by visit count; later on, pick the absolute max.'''
        if self.root.position.n > self.temp_threshold:
            fcoord = np.argmax(self.root.child_N)
        else:
            cdf = self.root.child_N.cumsum()
            cdf /= cdf[-1]
            selection = random.random()
            fcoord = cdf.searchsorted(selection)
            assert self.root.child_N[fcoord] != 0
        return coords.from_flat(fcoord)
예제 #19
0
 def most_visited_path(self):
     node = self
     output = []
     while node.children:
         next_kid = np.argmax(node.child_N)
         node = node.children.get(next_kid)
         if node is None:
             output.append('GAME END')
             break
         output.append('{} ({}) ==> '.format(
             coords.to_kgs(self.board_size,
                           coords.from_flat(self.board_size, node.fmove)),
             node.N))
     output.append('Q: {:.5f}\n'.format(node.Q))
     return ''.join(output)
예제 #20
0
파일: mcts.py 프로젝트: ALISCIFP/models
 def most_visited_path(self):
   node = self
   output = []
   while node.children:
     next_kid = np.argmax(node.child_N)
     node = node.children.get(next_kid)
     if node is None:
       output.append('GAME END')
       break
     output.append('{} ({}) ==> '.format(
         coords.to_kgs(
             self.board_size,
             coords.from_flat(self.board_size, node.fmove)), node.N))
   output.append('Q: {:.5f}\n'.format(node.Q))
   return ''.join(output)
def extract_move_data(root_node, worker_id, completed_time, board_size):
    current_node = root_node.next
    move_data = []
    move_num = 1
    while current_node is not None:
        props = current_node.properties
        if 'B' in props:
            to_play = 1
            move_played = props['B'][0]
        elif 'W' in props:
            to_play = -1
            move_played = props['W'][0]
        else:
            import pdb; pdb.set_trace()
        move_played = coords.to_flat(coords.from_sgf(move_played))
        post_Q, debug_rows = parse_comment_node(props['C'][0])
        policy_prior = [0] * (board_size * board_size + 1)
        policy_prior_orig = policy_prior[:]
        mcts_visit_counts = policy_prior[:]
        mcts_visit_counts_norm = policy_prior[:]
        for debug_row in debug_rows:
            move = debug_row.move
            policy_prior[move] = debug_row.prior
            policy_prior_orig[move] = debug_row.orig_prior
            mcts_visit_counts[move] = debug_row.N
            mcts_visit_counts_norm[move] = debug_row.soft_N

        move_data.append({
            'worker_id': worker_id,
            'completed_time': completed_time,
            'move_num': move_num,
            'turn_to_play': to_play,
            'move': move_played,
            'move_kgs': coords.to_kgs(coords.from_flat(move_played)),
            'prior_Q': None,
            'post_Q': post_Q,
            'policy_prior': policy_prior,
            'policy_prior_orig': policy_prior_orig,
            'mcts_visit_counts': mcts_visit_counts,
            'mcts_visit_counts_norm': mcts_visit_counts_norm,
        })
        move_num += 1
        current_node = current_node.next
    return move_data
예제 #22
0
    def run(self, model, position):
        # assert position is of type Position from go.py
        root = Node(0, position.to_play)
        boards, playerCaps, opponentCaps = gamesToData([[position, 1]])
        action_probs = model.callPol(boards, playerCaps, opponentCaps)[0]
        value = model.callVal(boards, playerCaps, opponentCaps)[0]
        valid_moves = position.all_legal_moves()
        action_probs = action_probs * valid_moves  # mask invalid moves
        action_probs /= np.sum(action_probs)
        root.expand(position, action_probs)

        for _ in range(self.number_of_sim):
            node = root
            search_path = [node]

            while node.expanded():
                action, node = node.select_child()
                search_path.append(node)
            parent = search_path[-2]
            position = parent.position
            next_position = position.play_move(coords.from_flat(action))
            if not next_position.is_game_over():
                new_boards, new_playerCaps, new_opponentCaps = gamesToData(
                    [[next_position, 1]])
                action_probs = model.callPol(
                    new_boards, new_playerCaps, new_opponentCaps)[0]
                value = model.callVal(
                    new_boards, new_playerCaps, new_opponentCaps)[0]
                valid_moves = next_position.all_legal_moves()
                action_probs = action_probs * valid_moves  # mask invalid moves
                action_probs /= np.sum(action_probs)

                node.expand(next_position, action_probs)
            else:
                if next_position.to_play == 1:
                    value = next_position.result()
                else:
                    value = next_position.result()*-1

            self.backpropagate(search_path, value, next_position.to_play)
        return root
예제 #23
0
 def _from_flat(flat_coords):
     return coords.from_flat(utils_test.BOARD_SIZE, flat_coords)
예제 #24
0
 def heatmap(self, sort_order, node, prop):
     return "\n".join(["{!s:6} {}".format(
         coords.to_kgs(coords.from_flat(key)),
         node.__dict__.get(prop)[key])
         for key in sort_order if node.child_N[key] > 0][:20])
예제 #25
0
def extract_move_data(root_node, worker_id, completed_time, board_size):
    current_node = root_node.next
    move_data = []
    move_num = 1
    while current_node is not None:
        props = current_node.properties
        if 'B' in props:
            to_play = 1
            move_played = props['B'][0]
        elif 'W' in props:
            to_play = -1
            move_played = props['W'][0]
        else:
            import pdb
            pdb.set_trace()
        move_played = coords.to_flat(coords.from_sgf(move_played))
        post_Q, debug_rows = parse_comment_node(props['C'][0])

        def get_row_data(debug_row):
            column_names = ["prior", "orig_prior", "N", "soft_N"]
            return [getattr(debug_row, field) for field in column_names]

        if FLAGS.only_top_move:
            assert len(debug_rows) <= 1
            row_data = list(map(get_row_data, debug_rows))
        else:
            row_data = [[0] * 4 for _ in range(board_size * board_size + 1)]
            for debug_row in debug_rows:
                move = debug_row.move
                row_data[move] = get_row_data(debug_row)

        policy_prior, policy_prior_orig, mcts_visits, mcts_visits_norm = \
            zip(*row_data)

        move_data.append({
            'worker_id':
            worker_id,
            'completed_time':
            completed_time,
            'move_num':
            move_num,
            'turn_to_play':
            to_play,
            'move':
            move_played,
            'move_kgs':
            coords.to_gtp(coords.from_flat(move_played)),
            'prior_Q':
            None,
            'post_Q':
            post_Q,
            'policy_prior':
            policy_prior,
            'policy_prior_orig':
            policy_prior_orig,
            'mcts_visit_counts':
            mcts_visits,
            'mcts_visit_counts_norm':
            mcts_visits_norm,
        })
        move_num += 1
        current_node = current_node.next
    return move_data
예제 #26
0
 def _heatmap(self, sort_order, node, prop):
     return "\n".join([
         "{!s:6} {}".format(coords.to_kgs(coords.from_flat(key)),
                            node.__dict__.get(prop)[key])
         for key in sort_order if node.child_N[key] > 0
     ][:20])
예제 #27
0
def play_mcts(network, board=None):
    pos = Position(board=board)

    player = get_mcts_player(network, pos)
    node = player.root
    children = node.rank_children()
    soft_n = node.child_N / max(1, sum(node.child_N))

    original_moves = {}

    heatmap = np.zeros((N, N), dtype=np.float)
    a_b = None
    for i in children:
        if node.child_N[i] == 0:
            break
        if a_b is None:
            a_b = coords.from_flat(i)
        original_moves[coords.to_gtp(coords.from_flat(i))] = soft_n[i]

    a_b = player.pick_move()
    
    # player.play_move(move)
    a_b_coords = a_b
    a_b = coords.to_gtp(a_b)

    print(original_moves)
    print("best action: ", a_b)
    print(node.position)
    p = original_moves[a_b]
    print(p)

    for i in range(N):
        for j in range(N):
            if board[i][j] == -1 or board[i][j] == 1:
                new_board = np.copy(board)
                new_board[i, j] = 0
                new_pos = perturb_position(pos, new_board)
                if new_pos.is_move_legal(a_b_coords):
                    player = get_mcts_player(network, new_pos)
                    node = player.root
                    print(node.position)
                    new_moves = {}
                    children = node.rank_children()
                    soft_n = node.child_N / max(1, sum(node.child_N))
                    for ch in children:
                        if node.child_N[ch] == 0:
                            break
                        new_moves[coords.to_gtp(coords.from_flat(ch))] = soft_n[ch]

                    new_a_b = player.pick_move()
                    # player.play_move(move)
                    new_a_b = coords.to_gtp(new_a_b)
                    # if new_a_b == 'F5':
                    print("---------------------")
                    # print("Moves: ", new_moves)    
                    if a_b in new_moves:
                        new_p = new_moves[a_b]
                    else:
                        new_p = 0.
                    print("New best move", new_a_b)
                    print("p", new_p)
                    print("------------------")

                    K = cross_entropy_mcts(original_moves, new_moves, a_b)
                    if K == -1:
                        print("index", i, j)
                        heatmap[i, j] = -1.0
                        continue
                    dP = p - new_p
                    if dP > 0:
                        heatmap[i, j] = 2.0*dP/(1. + dP*K)
                else:
                    heatmap[i, j] = -1.0

    heatmap[heatmap == -1] = np.max(heatmap)
    heatmap[heatmap<np.max(heatmap)/1.5] = 0
    plt.imshow(heatmap, cmap='jet')
    plt.colorbar()
    plt.show()
    return player
예제 #28
0
def play_network(network, board=None):
    '''
		Generates saliency maps of 3 methods given a board position
    '''
    pos = Position(board=board)
    original_moves = {}
    heatmap = np.zeros((N,N), dtype=np.float)
    
    policy, V = network.run(pos)
    
    best_move = np.argmax(policy)
    print("Best Move is", coords.to_gtp(coords.from_flat(best_move)))
    p = np.max(policy)
    
    player = get_mcts_player(network, pos)
    node = player.root

    old_Q = node.child_Q[best_move]

    atariV = np.zeros([N, N], dtype=np.float)
    atariP = np.zeros([N, N], dtype=np.float)
    delQ = np.zeros([N, N], dtype=np.float)
    heatmap = np.zeros([N, N], dtype=np.float)
    for i in range(N):
        for j in range(N):
            if board[i, j] == 1 or board[i, j] == -1:
                print(i, j)
                print("---------------------")

                new_board = np.copy(board)
                new_board[i, j] = 0
                new_pos = perturb_position(pos, new_board)
                new_policy, new_V = network.run(new_pos)
                new_p = new_policy[best_move]

                player = get_mcts_player(network, pos)
                node = player.root
                # print(node.describe())
                new_Q = node.child_Q[best_move]

                atariV[i, j] = 0.5*((V - new_V)**2)
                atariP[i, j] = 0.5*np.linalg.norm(policy - new_policy)
                dP = p - new_p
                
                dQ = old_Q - new_Q
                K = cross_entropy(policy, new_policy, best_move)
                if dP>0:
                    heatmap[i, j] = 2*dP/(1 + dP*K)

                if dQ>0:
                    delQ[i, j] = dQ

    atariV = (atariV - np.min(atariV))/(np.max(atariV) - np.min(atariV))
    atariP = (atariP - np.min(atariP))/(np.max(atariP) - np.min(atariP))

    # heatmap[heatmap < np.max(heatmap)/3] = 0
    # atariV[atariV < np.max(atariV)/3] = 0
    # atariP[atariP < np.max(atariP)/3] = 0
    # delQ[delQ < np.max(delQ)/3] = 0
    

    frame = np.zeros((N,N,3))
    frame = saliency_combine(atariV, frame, blur=256, channel=2)
    frame = saliency_combine(atariP, frame, blur=256, channel=0)

    plt.figure(1)
    plt.imshow(atariV, cmap = 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'atariV.png')
    plt.show()
    
    plt.figure(2)
    plt.imshow(atariP, cmap= 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'atariP.png')
    plt.show()

    plt.figure(3)
    plt.imshow(frame)
    plt.savefig(save_path + 'atari.png')
    plt.show()


    plt.figure(4)
    plt.imshow(delQ, cmap = 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'deltaQ.png')
    plt.show()

    plt.figure(5)
    plt.imshow(heatmap, cmap = 'Reds')
    plt.colorbar()
    plt.savefig(save_path + 'entropy.png')
    plt.show()
예제 #29
0
def eval_player(player, positions, moves, results):
  probs, values = batch_run_many(player, positions)
  policy_moves = [coords.from_flat(c) for c in np.argmax(probs, axis=1)]
  top_move_agree = [moves[idx] == policy_moves[idx] for idx in range(len(moves))]
  square_err = (values - results)**2/4
  return top_move_agree, square_err
예제 #30
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    # print("prob", prob)
    # print("val", val)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1
                                and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (player.root.position.n, readouts, dur / readouts * 100.0,
                   dur),
                  flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_kgs(coords.from_flat(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q),
              file=sys.stderr)
        print(player.root.position,
              player.root.position.score(),
              file=sys.stderr)

    return player
예제 #31
0
 def _from_flat(flat_coords):
   return coords.from_flat(utils_test.BOARD_SIZE, flat_coords)
예제 #32
0
def play(network):
    ''' Plays out a self-play match, returning a MCTSPlayer object containing:
        - the final position
        - the n x 362 tensor of floats representing the mcts search probabilities
        - the n-ary tensor of floats representing the original value-net estimate
          where n is the number of moves in the game'''
    readouts = FLAGS.num_readouts  # defined in strategies.py
    # Disable resign in 5% of games
    if random.random() < FLAGS.resign_disable_pct:
        resign_threshold = -1.0
    else:
        resign_threshold = None

    player = MCTSPlayer(network, resign_threshold=resign_threshold)

    player.initialize_game()

    # Must run this once at the start to expand the root node.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if FLAGS.verbose >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1
                                    and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (player.root.position.n, readouts, dur / readouts * 100.0,
                   dur),
                  flush=True)
        if FLAGS.verbose >= 3:
            print("Played >>",
                  coords.to_kgs(coords.from_flat(player.root.fmove)))

    if FLAGS.verbose >= 2:
        utils.dbg("%s: %.3f" % (player.result_string, player.root.Q))
        utils.dbg(player.root.position, player.root.position.score())

    return player
예제 #33
0
def testAgainstRandom(model, matches):
    #untrained models do not play at random, they have random weight initializations and then alawys play in terms of those
    #this function takes a model (a trained one) and plays it against a player who makes a random move every time.
    #it plays matches number of matches
    veteran = model
    veteranWins = 0
    beginnerWins = 0
    white = None
    black = None
    for i in range(matches):
        if i % 2 == 0:
            black = veteran
        else:
            white = veteran
        position = go.Position()
        while not position.is_game_over():
            if position.n >= 100:
                position = position.pass_move()
            else:
                if position.to_play == 1:
                    if black == veteran:
                        boards, playerCaps, opponentCaps = gamesToData(
                            [[position, 1]])
                        actions = black.callPol(boards, playerCaps,
                                                opponentCaps)[0]
                        pdist = tf.nn.softmax(
                            tf.cast(actions, dtype=tf.float64))
                        legalMoves = position.all_legal_moves()
                        move = np.random.choice(np.arange(0, len(pdist)),
                                                p=pdist)
                        if legalMoves[move] == 0:
                            actions = actions * legalMoves
                            move = tf.math.argmax(actions).numpy()
                        position = position.play_move(coords.from_flat(move))
                    else:
                        position = choose_and_play_move(position)
                else:
                    if white == veteran:
                        boards, playerCaps, opponentCaps = gamesToData(
                            [[position, 1]])
                        actions = white.callPol(boards, playerCaps,
                                                opponentCaps)[0]
                        pdist = tf.nn.softmax(
                            tf.cast(actions, dtype=tf.float64))
                        legalMoves = position.all_legal_moves()
                        move = np.random.choice(np.arange(0, len(pdist)),
                                                p=pdist)
                        if legalMoves[move] == 0:
                            actions = actions * legalMoves
                            move = tf.math.argmax(actions).numpy()
                        position = position.play_move(coords.from_flat(move))
                    else:
                        position = choose_and_play_move(position)

        if black == veteran:
            if position.result() == 1:
                veteranWins += 1
            elif position.result() == -1:
                beginnerWins += 1
            else:
                print("No one wins!!")
        else:
            if position.result() == 1:
                beginnerWins += 1
            elif position.result() == -1:
                veteranWins += 1
            else:
                print("No one wins!!")
    print("The model wins " + str(veteranWins))
    print("The random wins " + str(beginnerWins))
    return veteranWins - beginnerWins
예제 #34
0
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves,
         verbosity=0):
  """Plays out a self-play match.

  Args:
    board_size: the go board size
    network: the DualNet model
    readouts: the number of readouts in MCTS
    resign_threshold: the threshold to resign at in the match
    simultaneous_leaves: the number of simultaneous leaves in MCTS
    verbosity: the verbosity of the self-play match

  Returns:
    the final position
    the n x 362 tensor of floats representing the mcts search probabilities
    the n-ary tensor of floats representing the original value-net estimate
      where n is the number of moves in the game.
  """
  player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold,
                      verbosity=verbosity, num_parallel=simultaneous_leaves)
  # Disable resign in 5% of games
  if random.random() < 0.05:
    player.resign_threshold = -1.0

  player.initialize_game()

  # Must run this once at the start, so that noise injection actually
  # affects the first move of the game.
  first_node = player.root.select_leaf()
  prob, val = network.run(first_node.position)
  first_node.incorporate_results(prob, val, first_node)

  while True:
    start = time.time()
    player.root.inject_noise()
    current_readouts = player.root.N
    # we want to do "X additional readouts", rather than "up to X readouts".
    while player.root.N < current_readouts + readouts:
      player.tree_search()

    if verbosity >= 3:
      print(player.root.position)
      print(player.root.describe())

    if player.should_resign():
      player.set_result(-1 * player.root.position.to_play, was_resign=True)
      break
    move = player.pick_move()
    player.play_move(move)
    if player.root.is_done():
      player.set_result(player.root.position.result(), was_resign=False)
      break

    if (verbosity >= 2) or (
        verbosity >= 1 and player.root.position.n % 10 == 9):
      print("Q: {:.5f}".format(player.root.Q))
      dur = time.time() - start
      print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
          player.root.position.n, readouts, dur / readouts * 100.0, dur))
    if verbosity >= 3:
      print("Played >>",
            coords.to_kgs(coords.from_flat(player.root.fmove)))

  if verbosity >= 2:
    print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
    print(player.root.position,
          player.root.position.score(), file=sys.stderr)

  return player