Beispiel #1
0
 def test_corner(self):
     board = Board(19, 19)
     board.place_stone(Player.black, Point(1, 2))
     board.place_stone(Player.black, Point(2, 2))
     board.place_stone(Player.black, Point(2, 1))
     self.assertTrue(is_point_an_eye(board, Point(1, 1), Player.black))
     self.assertFalse(is_point_an_eye(board, Point(1, 1), Player.white))
Beispiel #2
0
    def select_move(self, game_state):
        num_moves = self.encoder.board_width * self.encoder.board_height
        move_probs = self.predict(game_state)
        # end::dl_agent_predict[]

        # tag::dl_agent_probabilities[]
        move_probs = move_probs**3  # <1>
        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)  # <2>
        move_probs = move_probs / np.sum(move_probs)  # <3>
        # <1> Increase the distance between the move likely and least likely moves.
        # <2> Prevent move probs from getting stuck at 0 or 1
        # <3> Re-normalize to get another probability distribution.
        # end::dl_agent_probabilities[]

        # tag::dl_agent_candidates[]
        candidates = np.arange(num_moves)  # <1>
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)  # <2>
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                    not is_point_an_eye(game_state.board, point, game_state.next_player):  # <3>
                return goboard.Move.play(point)
        return goboard.Move.pass_turn()  # <4>
Beispiel #3
0
    def select_move(self, game_state):
        num_moves = self._encoder.board_width * self._encoder.board_height

        board_tensor = self._encoder.encode(game_state)
        x = np.array([board_tensor])

        move_probs = self._model.predict(x)[0]

        # Prevent move probs from getting stuck at 0 or 1.
        eps = 1e-5
        move_probs = np.clip(move_probs, eps, 1 - eps)
        # Re-normalize to get another probability distribution.
        move_probs = move_probs / np.sum(move_probs)

        # Turn the probabilities into a ranked list of moves.
        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self._encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                    not is_point_an_eye(game_state.board,
                                        point,
                                        game_state.next_player):
                if self._collector is not None:
                    self._collector.record_decision(state=board_tensor,
                                                    action=point_idx)
                return goboard.Move.play(point)
        # No legal, non-self-destructive moves less.
        return goboard.Move.pass_turn()
Beispiel #4
0
    def select_move(self, game_state):
        board_tensor = self._encoder.encode(game_state)

        moves = []
        board_tensors = []
        for move in game_state.legal_moves():
            if not move.is_play:
                continue
            moves.append(self._encoder.encode_point(move.point))
            board_tensors.append(board_tensor)
        if not moves:
            return goboard.Move.pass_turn()

        num_moves = len(moves)
        board_tensors = np.array(board_tensors)
        move_vectors = np.zeros((num_moves, self._encoder.num_points()))
        for i, move in enumerate(moves):
            move_vectors[i][move] = 1

        values = self._model.predict([board_tensors, move_vectors])
        values = values.reshape(len(moves))

        ranked_moves = self.ranked_moves_eps_greedy(values)

        for move_idx in ranked_moves:
            point = self._encoder.decode_point_index(moves[move_idx])
            if not is_point_an_eye(game_state.board, point,
                                   game_state.next_player):
                if self._collector is not None:
                    self._collector.record_decision(state=board_tensor,
                                                    action=moves[move_idx])
                return goboard.Move.play(point)
        return goboard.Move.pass_turn()
Beispiel #5
0
    def select_move(self, game_state):
        num_moves = self.encoder.board_width * self.encoder.board_height
        move_probs = self.predict(game_state)

        # 可能性の高い着手と低い着手の距離を広げる
        move_probs = move_probs**3

        # (0, 1)の範囲に正規化
        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)  # 確率分布にする

        # 着手先のインデックス
        candidates = np.arange(num_moves)

        # 作った確率分布を元に, 19*19個の候補を非復元抽出
        # つまり確率分布に従うランキングを作る
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)

        # ランキングの上位から調べていき,合法手を選んで打つ
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)):
                if not is_point_an_eye(game_state.board, point,
                                       game_state.next_player):
                    return goboard.Move.play(point)

        # 合法手がなければパス
        return goboard.Move.pass_turn()
Beispiel #6
0
    def select_move(self, game_state):
        num_moves = self._encoder.board_width * self._encoder.board_height

        board_tensor = self._encoder.encode(game_state)
        X = np.array([board_tensor])

        actions, values = self._model.predict(X)
        move_probs = actions[0]
        estimated_value = values[0][0]

        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)

        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self._encoder.decode_point_index(point_idx)
            move = goboard.Move.play(point)
            move_is_valid = game_state.is_valid_move(move)
            fills_own_eye = is_point_an_eye(game_state.board, point,
                                            game_state.next_player)
            if move_is_valid and (not fills_own_eye):
                if self._collector is not None:
                    self._collector.record_decision(
                        state=board_tensor,
                        action=point_idx,
                        estimated_value=estimated_value)
                return goboard.Move.play(point)
        return goboard.Move.pass_turn()
Beispiel #7
0
    def select_move(self, game_state):
        num_moves = self.encoder.board_width * self.encoder.board_height
        move_probs = self.predict(game_state)

        
        move_probs = move_probs ** 3

    
        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)  #
        candidates = np.arange(num_moves)

        
        ranked_moves = np.random.choice(
            candidates, num_moves, replace=False, p=move_probs
        )

        
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)):
                if not is_point_an_eye(game_state.board, point, game_state.next_player):
                    return goboard.Move.play(point)

        
        return goboard.Move.pass_turn()
Beispiel #8
0
    def select_move(self, game_state):
        """
            眼をつぶさないように,あとは禁じ手にならなければランダム.
            考えうる限り最弱のボット,30級程度
        """
        candidates = []
        for r in range(1, game_state.board.num_rows + 1):
            for c in range(1, game_state.board.num_cols + 1):
                candidate = Point(row=r, col=c)
                # 自殺手,劫なら次の手の候補ではない
                if not game_state.is_valid_move(Move.play(candidate)):
                    continue
                # 眼になっているところなら次の手の候補ではない
                if is_point_an_eye(game_state.board, candidate,
                                   game_state.next_player):
                    continue
                # 候補として追加
                candidates.append(candidate)

        # 打てるところが無ければパス
        if not candidates:
            return Move.pass_turn()

        move = Move.play(random.choice(candidates))
        return move
Beispiel #9
0
 def select_move(self, game_state):
     candidates = []
     for r in range(1, game_state.board.num_rows + 1):
         for c in range(1, game_state.board.num_cols + 1):
             candidate = Point(row=r, col=c)
             if game_state.is_valid_move(Move.play(candidate)) and \
                     not is_point_an_eye(game_state.board, candidate, game_state.next_player):
                 candidates.append(candidate)
     if not candidates:
         return Move.pass_turn()
     return Move.play(random.choice(candidates))
Beispiel #10
0
 def test_middle(self):
     board = Board(19, 19)
     board.place_stone(Player.black, Point(2, 2))
     board.place_stone(Player.black, Point(3, 2))
     board.place_stone(Player.black, Point(4, 2))
     board.place_stone(Player.black, Point(4, 3))
     board.place_stone(Player.white, Point(4, 4))
     board.place_stone(Player.black, Point(3, 4))
     board.place_stone(Player.black, Point(2, 4))
     board.place_stone(Player.black, Point(2, 3))
     self.assertTrue(is_point_an_eye(board, Point(3, 3), Player.black))
Beispiel #11
0
    def select_move(self, game_state):  # 11.6
        board_tensor = self.encoder.encode(game_state)

        # Generates a list of all valid moves
        moves = []
        board_tensors = []
        for move in game_state.legal_moves():
            if not move.is_play:
                continue
            moves.append(self.encoder.encode_point(move.point))
            board_tensors.append(board_tensor)
        # If there are no valid moves left, the agent can just pass.
        if not moves:
            return goboard.Move.pass_turn()

        num_moves = len(moves)
        board_tensors = np.array(board_tensors)
        # One-hot encodes all the valid moves (see chapter 5 for more on one-hot encoding)
        move_vectors = np.zeros((num_moves, self.encoder.num_points()))
        for i, move in enumerate(moves):
            move_vectors[i][move] = 1

        # This is the two-input form of predict: you pass the two inputs as a list.
        values = self.model.predict([board_tensors, move_vectors])

        # Values will be an N × 1 matrix, where N is the number of legal moves;
        # the reshape call converts to a vector of size N.
        values = values.reshape(len(moves))

        # Ranks according to the epsilon-greedy policy
        # ranked_moves = self.rank_moves_eps_greedy(values)
        if self.policy == 'eps-greedy':
            ranked_moves = self.rank_moves_eps_greedy(values)
        elif self.policy == 'weighted':
            ranked_moves = self.rank_moves_weighted(values)
        else:
            ranked_moves = None

        # Picks the first non­self­destructive move in your list,
        # similar to the self­play agents from chapter 9
        for move_idx in ranked_moves:
            point = self.encoder.decode_point_index(moves[move_idx])
            if not is_point_an_eye(game_state.board, point,
                                   game_state.next_player):
                # Records the decision in an experience buffers; see chapter 9
                if self.collector is not None:
                    self.collector.record_decision(
                        state=board_tensor,
                        action=moves[move_idx],
                    )
                return goboard.Move.play(point)
        # You'll fall through here if all the valid moves are determined to be self-destructive.
        return goboard.Move.pass_turn()
Beispiel #12
0
 def select_move(self, game_state):
     """Choose a random valid move that preserves our own eyes."""
     candidates = []
     for r in range(1, game_state.board.num_rows + 1):
         for c in range(1, game_state.board.num_cols + 1):
             candidate = Point(row=r, col=c)
             if (game_state.is_valid_move(Move.play(candidate))
                     and not is_point_an_eye(game_state.board, candidate,
                                             game_state.next_player)):
                 candidates.append(candidate)
     if not candidates:
         return Move.pass_turn()
     return Move.play(random.choice(candidates))
Beispiel #13
0
 def select_move(self, game_state):
   """Choose a random valid move that perserves our own eyes"""
   candidates = [] # keeps track of all valid moves
   for r in range(1, game_state.board.num_rows + 1):
     for c in range(1, game_state.board.num_cols + 1):
       candidate = Point(row=r, col=c)
       if game_state.is_valid_move(Move.play(candidate)) and \
                       not is_point_an_eye(game_state.board,
                                           candidate,
                                           game_state.next_player):
                   candidates.append(candidate)
   if not candidates:
     return Move.pass_turn() # if there are no valid moves left, we pass
   return Move.play(random.choice(candidates)) # choose a random move that is valid
Beispiel #14
0
 def __init__(self, game_state: GameState, parent: MCTSNode = None, last_move: Move = None):
     self.game_state = game_state
     self.parent = parent
     self.last_move = last_move
     self.win_counts: Dict[Player, int] = {
         Player.black: 0,
         Player.white: 0,
     }
     self.num_rollouts: int = 0
     self.children: List[MCTSNode] = []
     b: Board = game_state.board
     p: Player = game_state.next_player
     moves: List[Move] = game_state.legal_moves()
     self.unvisited_moves: List[Move] = [m for m in moves if not (m.point and is_point_an_eye(b, m.point, p))]
 def select_move(self, game_state):
     current_board = self.encode_game_state(game_state)
     current_board = torch.from_numpy(current_board).to(device)
     current_board = current_board.view(-1, 1, 9, 9)
     score = self.model(current_board)
     score = list(enumerate(score[0].cpu().data.numpy()))
     score = sorted(score, key=lambda v: v[1], reverse=True)
     for pos, _ in score:
         point = Point(pos // 9 + 1, pos % 9 + 1)
         if game_state.is_valid_move(goboard.Move.play(point)) and \
                 not is_point_an_eye(game_state.board,
                                     point,
                                     game_state.next_player):
             return goboard.Move.play(point)
     return goboard.Move.pass_turn()
Beispiel #16
0
 def select_move(self, game_state):
     """Choose a random valid move that preserves our own eyes.
     自分の眼を維持するランダムな有効な着手を選択する"""
     candidates = []  # 候補
     for r in range(1, game_state.board.num_rows + 1):
         for c in range(1, game_state.board.num_cols + 1):
             candidate = Point(row=r, col=c)
             if game_state.is_valid_move( Move.play( candidate ) ) and \
                not is_point_an_eye( game_state.board,            # <1>
                                     candidate,
                                     game_state.next_player ):
                 candidates.append(candidate)  # <2>
     if not candidates:  # <3>
         return Move.pass_turn()
     return Move.play(random.choice(candidates))  # <4>
Beispiel #17
0
    def select_move(self, game_state):
        """Choose a random valid move that preserves our own eyes."""
        dim = (game_state.board.num_rows, game_state.board.num_cols)
        if dim != self.dim:
            self._update_cache(dim)

        idx = np.arange(len(self.point_cache))
        np.random.shuffle(idx)
        for i in idx:
            p = self.point_cache[i]
            if game_state.is_valid_move(Move.play(p)) and \
                    not is_point_an_eye(game_state.board,
                                        p,
                                        game_state.next_player):
                return Move.play(p)
        return Move.pass_turn()
Beispiel #18
0
    def select_move(self, game_state):
        # Chon random 1 nuoc di hop le
        dim = (game_state.board.num_rows, game_state.board.num_cols)
        if dim != self.dim:
            self._update_cache(dim)

        idx = np.arange(len(self.point_cache))
        np.random.shuffle(idx)
        for i in idx:
            p = self.point_cache[i]
            if game_state.is_valid_move(Move.play(p)) and \
                    not is_point_an_eye(game_state.board,
                                        p,
                                        game_state.next_player):
                return Move.play(p)
        return Move.pass_turn()
Beispiel #19
0
    def select_move(self, game_state):
        # Choose a random valid move that preserves our eyes
        candidates = []
        for r in range(1, game_state.board.num_rows + 1):
            for c in range(1, game_state.board.num_cols + 1):
                candidate = Point(row=r, col=c)
                # If a point is empty, not a self-capture, doesn't violate ko and preserve eyes
                if game_state.is_valid_move(Move.play(candidate)) and \
                    not is_point_an_eye(game_state.board,candidate,game_state.next_player):
                    candidates.append(candidate)

        # No valid moves then pass
        if not candidates:
            return Move.pass_turn()
        # Play a random move from the candidates
        return Move.play(random.choice(candidates))
Beispiel #20
0
    def select_move(self, game_state):

        # Loop over all legal moves.
        moves = []
        board_tensors = []
        board_tensor = None
        for move in game_state.legal_moves():
            if not move.is_play:
                continue
            next_state = game_state.apply_move(move)
            board_tensor = self.encoder.encode(next_state)
            moves.append(move)
            board_tensors.append(board_tensor)
        if not moves:
            return goboard.Move.pass_turn()

        # num_moves = len(moves)
        board_tensors = np.array(board_tensors)

        # Values of the next state from opponent's view.
        opp_values = self.model.predict(board_tensors)
        opp_values = opp_values.reshape(len(moves))

        # Values from our point of view.
        values = 1 - opp_values

        if self.policy == 'eps-greedy':
            ranked_moves = self.rank_moves_eps_greedy(values)
        elif self.policy == 'weighted':
            ranked_moves = self.rank_moves_weighted(values)
        else:
            ranked_moves = None

        for move_idx in ranked_moves:
            move = moves[move_idx]
            if not is_point_an_eye(game_state.board, move.point,
                                   game_state.next_player):
                if self.collector is not None:
                    self.collector.record_decision(
                        state=board_tensor,
                        action=self.encoder.encode_point(move.point),
                    )
                self.last_move_value = float(values[move_idx])
                return move
        # No legal, non-self-destructive moves less.
        return goboard.Move.pass_turn()
Beispiel #21
0
    def select_move(self, game_state, nmoves=1):
        num_moves = self._encoder.board_width * self._encoder.board_height

        board_tensor = self._encoder.encode(game_state)
        x = np.array([board_tensor])

        if np.random.random() < self._temperature:
            # Explore random moves.
            move_probs = np.ones(num_moves) / num_moves
        else:
            # Follow our current policy.
            move_probs = self._model.predict(x)[0]

        # Prevent move probs from getting stuck at 0 or 1.
        eps = 1e-5
        move_probs = np.clip(move_probs, eps, 1 - eps)
        # Re-normalize to get another probability distribution.
        move_probs = move_probs / np.sum(move_probs)

        # Turn the probabilities into a ranked list of moves.
        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)

        moves = []
        for point_idx in ranked_moves:
            point = self._encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                    not is_point_an_eye(game_state.board,
                                        point,
                                        game_state.next_player):
                if self._collector is not None:
                    self._collector.record_decision(state=board_tensor,
                                                    action=point_idx)
                moves.append(goboard.Move.play(point))
                # return goboard.Move.play(point)
            if len(moves) == nmoves:
                break
        # No legal, non-self-destructive moves less.
        for i in range(nmoves - len(moves)):
            moves.append(goboard.Move.pass_turn())
        # return goboard.Move.pass_turn()

        return moves
Beispiel #22
0
    def my_select_move(self, game_state, board_ext=None):
        num_moves = self.encoder.board_width * self.encoder.board_height
        move_probs = self.predict(game_state, board_ext)
        # end::dl_agent_predict[]

        # tag::dl_agent_probabilities[]
        move_probs = move_probs**3  # <1>
        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)  # <2>
        move_probs = move_probs / np.sum(move_probs)  # <3>
        # <1> Increase the distance between the move likely and least likely moves.
        # <2> Prevent move probs from getting stuck at 0 or 1
        # <3> Re-normalize to get another probability distribution.
        # end::dl_agent_probabilities[]

        # tag::dl_agent_candidates[]
        candidates = np.arange(num_moves)  # <1>
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)  # <2>

        possible_point = []  # Список всех доступных ходов предложенных сетью.
        wait_score = 0  # Счет на доске, выбрать ход приносящий максимально допустимый счет #<5>
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                    not is_point_an_eye(game_state.board, point, game_state.next_player):  # <3>
                possible_point.append(point)
        if not possible_point:  # Нет допустимых ходов, тогда пас.
            return goboard.Move.pass_turn(), wait_score  # <4>
        # Выбрать из всех возможных ходов приносящий лучший счет на доске
        #cand = []
        for p in possible_point:
            game_state_copy = copy.deepcopy(game_state)
            next_move = goboard.Move.play(p)
            game_state_copy = game_state_copy.apply_move(next_move)
            res = str(gr(game_state_copy)[0])[
                1:]  # Отбрасываю B или W, оставляю знак
            res = float(res)
            if res > wait_score:
                wait_score = res
                point = p
                #cand.append(p)

        return goboard.Move.play(point), wait_score
Beispiel #23
0
    def select_move(self, game_state):  # 9.12 and 9.17
        board_tensor = self._encoder.encode(game_state)
        x = np.array([board_tensor
                      ])  # The Keras Predict call makes batch predictions,
        num_moves = self._encoder.board_width * self._encoder.board_height

        # move_probs = self._model.predict(x)[0]           # so you wrap your single board in an array and

        if np.random.random() < self._temperature:
            # Explore random moves.
            move_probs = np.ones(num_moves) / num_moves
        else:
            # Follow our current policy.
            move_probs = self._model.predict(x)[0]

        # move_probs = clip_probs(move_probs)     # pull out the first item the resulting array
        eps = 1e-5
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)

        # move_probs = move_probs.astype(dtype=np.float64)
        candidates = np.arange(
            num_moves
        )  # Creates an array containing the index of every point on the board

        # Samples from the points on the board according to the policy, creates a ranked list of points to try
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)

        for point_idx in ranked_moves:  # Loops over each point, checks if it's valid, and picks the first valid one
            point = self._encoder.decode_point_index(point_idx)
            move = goboard.Move.play(point)
            is_valid = game_state.is_valid_move(move)
            is_an_eye = is_point_an_eye(game_state.board, point,
                                        game_state.next_player)
            if is_valid and (not is_an_eye):
                if self._collector is not None:  # At the time it chooses a move, notifies the collector of the deci
                    self._collector.record_decision(state=board_tensor,
                                                    action=point_idx)
                return goboard.Move.play(point)

        return goboard.Move.pass_turn(
        )  # If you fall through here, there are not reasonable moves left.
Beispiel #24
0
    def select_move(self, game_state):
        board_tensor = self.encoder.encode(game_state)

        # Loop over all legal moves.
        moves = []
        board_tensors = []
        for move in game_state.legal_moves():
            if not move.is_play:
                continue
            moves.append(self.encoder.encode_point(move.point))
            board_tensors.append(board_tensor)
        if not moves:
            return goboard.Move.pass_turn()

        num_moves = len(moves)
        board_tensors = np.array(board_tensors)
        move_vectors = np.zeros((num_moves, self.encoder.num_points()))
        for i, move in enumerate(moves):
            move_vectors[i][move] = 1

        values = self.model.predict([board_tensors, move_vectors])
        values = values.reshape(len(moves))

        if self.policy == 'eps-greedy':
            ranked_moves = self.rank_moves_eps_greedy(values)
        elif self.policy == 'weighted':
            ranked_moves = self.rank_moves_weighted(values)
        else:
            ranked_moves = None

        for move_idx in ranked_moves:
            point = self.encoder.decode_point_index(moves[move_idx])
            if not is_point_an_eye(game_state.board,
                                   point,
                                   game_state.next_player):
                if self.collector is not None:
                    self.collector.record_decision(
                        state=board_tensor,
                        action=moves[move_idx],
                    )
                self.last_move_value = float(values[move_idx])
                return goboard.Move.play(point)
        # No legal, non-self-destructive moves less.
        return goboard.Move.pass_turn()
Beispiel #25
0
 def select_move(self, game_state):
     """Choose a random valid  move that preserves our own eyes"""
     # Get points that are a candidate for placing a stone
     candidates = []
     for r in range(1, game_state.board.num_rows + 1):
         for c in range(1, game_state.board.num_cols + 1):
             candidate = Point(row = r, col = c)
             # If a point within the board is a valid move, save it as a
             # candidate
             if game_state.is_valid_move(Move.play(candidate)) and \
                 not is_point_an_eye(game_state.board,
                                     candidate,
                                     game_state.next_player):
                 candidates.append(candidate)
     # If there are no candidates, pass
     if not candidates:
         return Move.pass_turn()
     # Else choose randomly among all the candidate
     return Move.play(random.choice(candidates))
Beispiel #26
0
    def select_move(self, game_state):
        num_moves = self.encoder.board_width * self.encoder.board_height

        board_tensor = self.encoder.encode(game_state)
        x = np.array([board_tensor])

        # Because this is a two-output model, predict returns a tuple containing two NumPy arrays
        actions, values = self.model.predict(x)

        # predict is a batch call that can process several boards at once,
        # so you must select the first element of the array to get the probability
        # distribution you want.
        move_probs = actions[0]

        # The values are represented as a one-dimensional vector,
        # so you must pull out the first element to get the value as a plain float
        estimated_value = values[0][0]

        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs = move_probs / np.sum(move_probs)

        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            move = goboard.Move.play(point)
            move_is_valid = game_state.is_valid_move(move)
            fills_own_eye = is_point_an_eye(game_state.board, point,
                                            game_state.next_player)
            if move_is_valid and (not fills_own_eye):
                if self.collector is not None:
                    # Include the estimated value in the experience buffer
                    self.collector.record_decision(
                        state=board_tensor,
                        action=point_idx,
                        estimated_value=estimated_value)
                return goboard.Move.play(point)
        return goboard.Move.pass_turn()
Beispiel #27
0
    def select_move(self, game_state):
        num_moves = self.encoder.board_width * self.encoder.board_height
        move_probs = self.predict(game_state)

        move_probs = move_probs**3  # Increases the distance between the more likely and least likely # moves
        eps = 1e-6
        move_probs = np.clip(
            move_probs, eps,
            1 - eps)  # prevent move_probs from getting stuck at 0 or 1
        move_probs = move_probs / np.sum(
            move_probs)  # renormalize to get another probability distribution
        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(  # Turns the move probabilities into a ranked list of moves
            candidates,
            num_moves,
            replace=False,
            p=move_probs)  # samples potential candidate moves
        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                not is_point_an_eye(game_state.board, point, game_state.next_player):
                return goboard.Move.play(point)
        return goboard.Move.pass_turn()
Beispiel #28
0
    def select_ranked_move(self, game_state, move_width=3, board_ext=None):
        num_moves = self.encoder.board_width * self.encoder.board_height
        move_probs = self.predict(game_state, board_ext)
        # end::dl_agent_predict[]

        # tag::dl_agent_probabilities[]
        move_probs = list(move_probs)
        move_pr = sorted(move_probs, reverse=True)[:move_width]  # Nail
        ranked_moves = list()
        for mr in move_pr:
            for mp in move_probs:
                if mp == mr:
                    ranked_moves.append(move_probs.index(mp))

        possible_moves = list()

        for point_idx in ranked_moves:
            point = self.encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                    not is_point_an_eye(game_state.board, point, game_state.next_player):  #

                possible_moves.append(goboard.Move.play(point))

        return possible_moves
Beispiel #29
0
    def encode(self, game_state):
        board_tensor = np.zeros((self.num_planes, self.board_height, self.board_width))
        for r in range(self.board_height):
            for c in range(self.board_width):
                point = Point(row=r + 1, col=c + 1)

                go_string = game_state.board.get_go_string(point)
                if go_string and go_string.color == game_state.next_player:
                    board_tensor[offset('stone_color')][r][c] = 1
                elif go_string and go_string.color == game_state.next_player.other:
                    board_tensor[offset('stone_color') + 1][r][c] = 1
                else:
                    board_tensor[offset('stone_color') + 2][r][c] = 1

                board_tensor[offset('ones')] = self.ones()
                board_tensor[offset('zeros')] = self.zeros()

                if not is_point_an_eye(game_state.board, point, game_state.next_player):
                    board_tensor[offset('sensibleness')][r][c] = 1

                ages = min(game_state.board.move_ages.get(r, c), 8)
                if ages > 0:
                    print(ages)
                    board_tensor[offset('turns_since') + ages][r][c] = 1

                if game_state.board.get_go_string(point):
                    liberties = min(game_state.board.get_go_string(point).num_liberties, 8)
                    board_tensor[offset('liberties') + liberties][r][c] = 1

                move = Move(point)
                if game_state.is_valid_move(move):
                    new_state = game_state.apply_move(move)
                    liberties = min(new_state.board.get_go_string(point).num_liberties, 8)
                    board_tensor[offset('liberties_after') + liberties][r][c] = 1

                    adjacent_strings = [game_state.board.get_go_string(nb)
                                        for nb in point.neighbors()]
                    capture_count = 0
                    for go_string in adjacent_strings:
                        other_player = game_state.next_player.other
                        if go_string and go_string.num_liberties == 1 and go_string.color == other_player:
                            capture_count += len(go_string.stones)
                    capture_count = min(capture_count, 8)
                    board_tensor[offset('capture_size') + capture_count][r][c] = 1

                if go_string and go_string.num_liberties == 1:
                    go_string = game_state.board.get_go_string(point)
                    if go_string:
                        num_atari_stones = min(len(go_string.stones), 8)
                        board_tensor[offset('self_atari_size') + num_atari_stones][r][c] = 1

                if is_ladder_capture(game_state, point):
                    board_tensor[offset('ladder_capture')][r][c] = 1

                if is_ladder_escape(game_state, point):
                    board_tensor[offset('ladder_escape')][r][c] = 1

                if self.use_player_plane:
                    if game_state.next_player == Player.black:
                        board_tensor[offset('ones')] = self.ones()
                    else:
                        board_tensor[offset('zeros')] = self.zeros()

        return board_tensor