Beispiel #1
0
 def __init__(self, s_model = None, f_model = None):
     self._board = Board()
     self._mycolor = None
     if (s_model == None):
         self._strong_model = s_base_model
     else:
         self._strong_model = s_model
     if (f_model == None):
         self._fast_model = f_base_model
     else:
         self._fast_model = f_model
Beispiel #2
0
    def rollout(self):
        """Play according to model_stone until the end
        returns result"""

        # make prediction for every slot
        prediction = node.get_play_stone(self.board)

        # check legal moves
        child_gnugo_board = deepcopy(self.gnugo_board)
        legal_moves = self.gnugo_board.legal_moves()

        # convert playable coords to board
        board_legal_moves = np.zeros((9, 9), dtype=bool)

        for move in legal_moves:
            x, y = name_to_coord(Board.flat_to_name(move))
            board_legal_moves[x, y] = 1

        # combine predictions with legal moves
        legal_predict = np.reshape(prediction[0], (9, 9)) * board_legal_moves

        # take best and push
        index_best = np.argmax(legal_predict)
        x = index_best % 9  # 9 should be replaced by board_size
        y = index_best // 9  # 9 should be replaced by board_size
        child_board = deepcopy(self.board)
        child_board[x, y][0] = 1

        tmp_move = Board.name_to_flat(Board.coord_to_name((x, y)))
        if tmp_move in legal_moves:
            child_gnugo_board.push(tmp_move)
            child = node(self, child_board, child_gnugo_board, tmp_move)
            self.children.append(child)
        else:
            tmp_move = np.random.choice(legal_moves)
            child_gnugo_board.push(tmp_move)
            child = node(self, child_board, child_gnugo_board, tmp_move)
            self.children.append(child)

        # continue rollouts while not game_over
        game_over = child_gnugo_board.is_game_over()
        if not game_over:
            return not child.rollout()
        else:
            result = child_gnugo_board.result()
            if result == "1/2-1/2":
                return 0
            return 1 if result == "0-1" else -1  # black wins if True
Beispiel #3
0
    def explore_node(self):
        """Explores all children of node"""
        player_turn = self.get_player_turn()
        tmp_board = deepcopy(self.board)

        # check legal moves
        child_gnugo_board = deepcopy(self.gnugo_board)
        legal_moves = self.gnugo_board.legal_moves()

        # convert playable coords to board
        board_legal_moves = np.zeros((9, 9), dtype=bool)

        # explore legal moves
        for move in legal_moves:
            [x, y] = name_to_coord(Board.flat_to_name(move))
            board_legal_moves[x, y] = 1

            # make move on local board
            child_board = deepcopy(tmp_board)
            child_board[x, y] = 1

            # push on gnugo_board
            child_gnugo_board.push(move)

            child = node(self, child_board, child_gnugo_board, move)
            self.children.append(child)
            self.nb_child_visit += 1
Beispiel #4
0
 def __init__(self):
     self._board = Board()
     self._mycolor = None
 def __init__(self):
     self.board = Board()
     self.mycolor = None
     self.transposition_table = {}
     self.max_time = 7.4
     self.start_time = 0
class myPlayer(PlayerInterface):
    def __init__(self):
        self.board = Board()
        self.mycolor = None
        self.transposition_table = {}
        self.max_time = 7.4
        self.start_time = 0

    def getPlayerName(self):
        return "Team 38"

    def getPlayerMove(self):
        if self.board.is_game_over():
            return "PASS"
        move = self.choose_action()
        self.board.push(move)
        return Board.flat_to_name(move)

    def playOpponentMove(self, move):
        self.board.push(Board.name_to_flat(move))

    def newGame(self, color):
        self.mycolor = color
        self.opponent = Board.flip(color)

    def endGame(self, winner):
        if self.mycolor == winner:
            print("I won :D")
        else:
            print("I lost :(")

    def choose_action(self):
        depth = 1
        self.start_time = time()
        (eval_score, selected_action) = (-1, -1)
        while (True):
            tmp_time = time()
            self.transposition_table = {}
            new_score, new_action = self.minimax(depth, True, float('-inf'),
                                                 float('+inf'))
            if (time() - self.start_time < self.max_time):
                (eval_score, selected_action) = (new_score, new_action)
            print("MINIMAX AB ID(%d) : eval=%f, action=%d, time=%s" %
                  (depth, eval_score, selected_action, time() - tmp_time))
            if (time() - self.start_time >= self.max_time):
                break
            depth += 1
        return selected_action

    def minimax(self, depth, is_max_turn, alpha, beta):
        transposition = self.transposition_table.get(
            str(self.board._currentHash))
        if transposition != None:
            return transposition
        if depth == 0 or (time() - self.start_time >=
                          self.max_time) or self.board.is_game_over():
            result = (self.evaluate(), None)  # self.player_color
            self.transposition_table.update(
                {str(self.board._currentHash): result})
            return result
        key_of_actions = list(self.board.generate_legal_moves())
        shuffle(key_of_actions)  #randomness
        best_value = float('-inf') if is_max_turn else float('+inf')
        best_action = -1
        action_targets = []
        for action_key in key_of_actions:
            self.board.push(action_key)
            eval_child, action_child = self.minimax(depth - 1, not is_max_turn,
                                                    alpha, beta)
            self.board.pop()
            if is_max_turn and best_value < eval_child:
                best_value = eval_child
                action_targets.clear()
                action_targets.append(action_key)
                alpha = max(alpha, best_value)
                if beta <= alpha:
                    break
            elif (not is_max_turn) and best_value > eval_child:
                best_value = eval_child
                action_targets.clear()
                action_targets.append(action_key)
                beta = min(beta, best_value)
                if beta <= alpha:
                    break
            elif best_value == eval_child:
                action_targets.append(action_key)
        if not not action_targets:
            best_action = choice(action_targets)  #randomness
        self.transposition_table.update(
            {str(self.board._currentHash): (best_value, best_action)})
        return (best_value, best_action)

    def evaluate(self):
        position_score = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 2, 2, 2,
            1, 2, 2, 2, 0, 0, 2, 2, 1, 1, 1, 2, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1,
            0, 0, 2, 2, 1, 1, 1, 2, 2, 0, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 2, 2,
            2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ]

        score_pieces = 0
        if self.board.next_player() == Board._BLACK:
            score_pieces += (self.board._nbWHITE -
                             self.board._nbBLACK) * 3  # score for white
        else:
            score_pieces += (self.board._nbBLACK -
                             self.board._nbWHITE) * 3  # score for black

        score_liberties = 0
        score_positions = 0
        for fcoord in range(len(self.board)):
            if self.board[fcoord] == Board._EMPTY:
                pass
            elif self.board[fcoord] == self.board.next_player():
                # Liberties
                string = self.board._getStringOfStone(fcoord)
                score_liberties -= self.board._stringLiberties[string] * 1
                # Corner + position
                score_positions -= position_score[fcoord] * 10
            else:
                # Liberties
                string = self.board._getStringOfStone(fcoord)
                score_liberties += self.board._stringLiberties[string] * 1
                # Corner + position
                score_positions += position_score[fcoord] * 10

        if self.board.next_player() == self.mycolor:
            score_pieces *= -1
            score_liberties *= -1
            score_positions *= -1

        return score_pieces * normal(1, 0.1) + score_positions * normal(
            1, 0.1) + score_liberties * normal(1, 0.1)
 def playOpponentMove(self, move):
     self.board.push(Board.name_to_flat(move))
 def getPlayerMove(self):
     if self.board.is_game_over():
         return "PASS"
     move = self.choose_action()
     self.board.push(move)
     return Board.flat_to_name(move)
Beispiel #9
0
class myPlayer(PlayerInterface):

    def __init__(self, s_model = None, f_model = None):
        self._board = Board()
        self._mycolor = None
        if (s_model == None):
            self._strong_model = s_base_model
        else:
            self._strong_model = s_model
        if (f_model == None):
            self._fast_model = f_base_model
        else:
            self._fast_model = f_model

    def getPlayerName(self):
        return "RL player"

    def encode(self, board):
        toPush = board._historyMoveNames
        lenToPush = len(toPush)
        black_stones = np.zeros((9,9), dtype=np.float32)
        white_stones = np.zeros((9,9), dtype=np.float32)
        memo = [np.zeros((9,9), dtype = int) for z in range(8)]
        if (lenToPush%2 == 0): # I respect the model convention / if (len(data[i]["list_of_moves"])%2 == 0):
            player_turn = np.ones((9,9), dtype=np.float32)
        else:
            player_turn = np.zeros((9,9), dtype=np.float32)
        toPlay = 0
        for move in toPush:
            move = name_to_coord(move)
            if (toPlay): # 1 is white
                white_stones[move[0],move[1]] = 1
            else:
                black_stones[move[0],move[1]] = 1
            toPlay = (toPlay + 1) % 2
        for i in range(8):
            if lenToPush >= i:
                move_history = name_to_coord(toPush[-i])
                memo[8-i,move[0],move[1]] = 1
        return(np.dstack((black_stones,white_stones,player_turn, memo[0], memo[1], memo[2], memo[3], memo[4], memo[5], memo[6], memo[7])))

    def getPlayerMove(self):
        if self._board.is_game_over():
            return "PASS"
        move = self.select_move(self._board)
        self._board.play_move(move)
        return Board.flat_to_name(move) 

    def playOpponentMove(self, move):
        self._board.play_move(Board.name_to_flat(move)) 

    def newGame(self, color):
        self._mycolor = color
        self._opponent = Board.flip(color)

    def endGame(self, winner):
        if self._mycolor == winner:
            print("I won :D")
        else:
            print("I lost :(")

    def select_move(self, board_org, max_time=7.4, temperature=1.2):
        start_time = time.time()
        root = MCTSNode(board_org.weak_legal_moves())
        # add nodes (at least 10,000 rollouts per turn)
        i=0
        while(True):
            board = copy.deepcopy(board_org)
            node = root
            while (not node.can_add_child()) and (not board.is_game_over()):
                node = self.select_child(node, board, temperature)
                #board.push(node.move)
            if node.can_add_child() and not board.is_game_over():
                node = node.add_random_child(board)
                #board.push(node.move)
            winner = self.simulate_random_game(board)
            while node is not None:
                node.record_win(winner)
                node = node.parent
            if (time.time() - start_time >= max_time):
                print()
                break
            i+=1
            print("Rounds %d (%f)" % (i,time.time()-start_time), end='\r')
        # debug
        scored_moves = [(child.winning_frac(board_org.next_player()), child.move, child.num_rollouts)
                        for child in root.children]
        scored_moves.sort(key=lambda x: x[0], reverse=True)
        for s, m, n in scored_moves[:5]:
            print('%s - %.3f (%d)' % (m, s, n))
        # pick best node
        best_move = -1
        best_pct = -1.0
        for child in root.children:
            child_pct = child.winning_frac(board_org.next_player())
            if child_pct > best_pct:
                best_pct = child_pct
                best_move = child.move
        print('Select move %s with win pct %.3f' % (best_move, best_pct))
        # TODO: Here, get the best root children  
        return best_move

    def select_child(self, node, board, temperature):
        # upper confidence bound for trees (UCT) metric
        total_rollouts = sum(child.num_rollouts for child in node.children)
        log_rollouts = math.log(total_rollouts)
        best_score = -1
        best_child = None
        # loop over each child.
        data_prepared = np.array([prepare_datas(board, all_rotations = False)[0][0]], dtype = int)
        policy_prediction = self._strong_model.predict( data_prepared ) # TODO: Verify if everything's okay with this line
        for child in node.children:
            # calculate the UCT score.
            win_percentage = (policy_prediction[0][child.move] + child.winning_frac(board.next_player())) / 2.0 # TODO: Verifier que name_to_flat donne bien la bonne case
            exploration_factor = math.sqrt(log_rollouts / child.num_rollouts)
            uct_score = win_percentage + temperature * exploration_factor
            # Check if this is the largest we've seen so far.
            if uct_score > best_score:
                best_score = uct_score
                best_child = child
        board.play_move(best_child.move)
        return best_child

    def simulate_random_game(self, board):
        nb_iter = 0
        while not board.is_game_over() and nb_iter < 1500 : #To avoid going infinite, which appends for ... some reasons ? :/
            moves = board.weak_legal_moves()
            move_probabilities = self._fast_model.predict(np.array([prepare_datas(board, all_rotations = False)[0][0]], dtype = int))
            max_move = np.argmax(move_probabilities)
            if max_move in moves:
                board.play_move(max_move)
            else:
                move = random.choice(moves)
                board.play_move(move)
            nb_iter += 1
            #if nb_iter % 50 == 0:
                #print("debug:", nb_iter, end = "/ ", flush = True)
        if (board._nbWHITE > board._nbBLACK):
            return "1-0"
        elif (board._nbWHITE < board._nbBLACK):
            return "0-1"
        else:
            return "1/2-1/2"
Beispiel #10
0
 def newGame(self, color):
     self._mycolor = color
     self._opponent = Board.flip(color)
Beispiel #11
0
 def __init__(self):
     self._board = Board()
     self._mycolor = None
     self._model_priors = tensorflow.keras.models.load_model('model_priors')
Beispiel #12
0
class myPlayer(PlayerInterface):
    def __init__(self):
        self._board = Board()
        self._mycolor = None
        self._model_priors = tensorflow.keras.models.load_model('model_priors')

    def getPlayerName(self):
        return "Paul & Hugo"

    def getPlayerMove(self):
        if self._board.is_game_over():
            return "PASS"
        move = self.select_move(self._board)
        self._board.play_move(move)
        return Board.flat_to_name(move)

    def playOpponentMove(self, move):
        self._board.play_move(Board.name_to_flat(move))

    def newGame(self, color):
        self._mycolor = color
        self._opponent = Board.flip(color)

    def endGame(self, winner):
        if self._mycolor == winner:
            print("I won :D")
        else:
            print("I lost :(")

    def select_move(self, board_org, max_time=7.4, temperature=1.2):
        start_time = time.time()

        # Create the root node with legal_moves of the board
        root = MCTSNode(board_org.weak_legal_moves(), board_org._nextPlayer)

        # add nodes
        while (True):
            board = copy.deepcopy(board_org)
            node = root

            # Select a node and play its move: EXPLORATION
            while (not node.can_add_child()) and (not board.is_game_over()):
                node = self.select_child(node, board, temperature)

            # Add a random child to the node selected if possible
            if node.can_add_child() and not board.is_game_over():
                node = node.add_random_child(board)

            # Construct a sample to be predicted by CNN_priors
            to_predict = np.empty((0, 15, board._BOARDSIZE, board._BOARDSIZE),
                                  dtype='int8')
            valid, sample_features_maps = db.build_history_from_moves(
                node.list_of_moves, board._BOARDSIZE)

            # If the board is not valid, we consider the node as a loss
            if not valid:
                # Backpropagation : update the win ratio of all the previous nodes
                while node is not None:
                    node.update_winrate(self._mycolor, 0)
                    node = node.parent

            else:
                # Predict the win_rate from the board
                to_predict = np.append(to_predict,
                                       sample_features_maps,
                                       axis=0)

                # ERROR when loading the model: the predict method does not work
                # tensorflow.python.framework.errors_impl.UnimplementedError:  The Conv2D op currently only supports the NHWC tensor format on the CPU. The op was given the format: NCHW

                # It demands a NHWC (batch n, height, width, channels) format instead of a NCHW
                # BUT the model does not accept the NHWC format either
                # to_predict = np.transpose(to_predict, (0, 2, 3, 1))
                # ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -3 of input shape to have value 15 but received input with shape (None, 9, 9, 15)

                # => problem during save/load of the model because it works in CNN_priors.ipynb after training the model

                prediction = self._model_priors.predict(to_predict)

                # Backpropagation : update the win ratio of all the previous nodes
                while node is not None:
                    node.update_winrate(self._mycolor, prediction)
                    node = node.parent

            # time over
            if (time.time() - start_time >= max_time):
                break

        # pick best node : EXPLOITATION
        best_move = -1
        best_ratio = -1.0

        for child in root.children:
            child_ratio = child.winrate(board_org.next_player())

            if child_ratio > best_ratio:
                best_ratio = child_ratio
                best_move = child.move
        print('Select move %s with win pct %.3f' % (best_move, best_ratio))
        return best_move

    def select_child(self, node, board, temperature):

        # upper confidence bound for trees (UCT) metric
        # total_rollouts = node.num_rollouts #???
        total_rollouts = sum(child.num_rollouts for child in node.children)
        log_rollouts = math.log(total_rollouts)

        best_score = -1
        best_child = None

        # loop over each child.
        for child in node.children:

            # calculate the UCT score.
            win_percentage = child.winrate(board.next_player())
            exploration_factor = math.sqrt(log_rollouts / child.num_rollouts)
            uct_score = win_percentage + temperature * exploration_factor

            # Check if this is the best score we've seen so far.
            if uct_score > best_score:
                best_score = uct_score
                best_child = child

        board.play_move(best_child.move)
        return best_child
Beispiel #13
0
def prepare_datas(
    board,
    care_about_win=False,
    all_rotations=True,
    all_moves=False
):  # all_rotations also say that you only want the board, not the goal
    datas = []
    nb_uses = random.randint(MIN_INFO_FROM_ONE_GAME, MAX_INFO_FROM_ONE_GAME)
    length = len(board._historyMoveNames)
    if (board.result() == "1-0"):
        winner = 1
    elif (board.result() == "0-1"):
        winner = 0
    else:
        winner = 2
    #print(winner)
    r = range(length - 1)
    moves = board._historyMoveNames
    if all_rotations:
        if not (all_moves):
            espe = length / 2.  #On ne peut pas prendre le dernier
            ecart = length / 4.
            #Distribution gaussienne pour avoir plus de chances de prendre des données du milieu de partie
            proba_not_normalized = [
                (1. / (ecart * np.sqrt(2 * np.pi)) * np.exp(
                    (i + 1 - espe)**2 / (2. * ecart)**2))
                for i in range(length - 1)
            ]
            norm = np.linalg.norm(proba_not_normalized)
            proba = [x / norm for x in proba_not_normalized]
            chosen_moves = np.random.choice(r, nb_uses, proba)
        else:
            chosen_moves = list(
                range(length - 1)
            )  #Take every move (the learning part, where we retrace the entire game)
    else:
        chosen_moves = [
            length - 1
        ]  # Only the last one, because we want a prediction on it
    for i in chosen_moves:  #On va s'arrêter au move i, et prédire le suivant
        black = np.zeros((9, 9), dtype=int)
        white = np.zeros((9, 9), dtype=int)
        memo = [np.zeros((9, 9), dtype=int) for z in range(8)]

        if (i + 1) % 2 == 0:  #Le coup actuel (après avoir joué i)
            current = np.ones(
                (9, 9),
                dtype=int)  #The whites, because the blacks have just played
        else:
            current = np.zeros(
                (9, 9), dtype=int)  #The blacks, the first move, (0+1)%2 == 1

        if all_rotations:
            goal_move = board.name_to_coord(moves[i + 1])
            goal = np.zeros((9, 9), dtype=int)
            if care_about_win and ((winner == 1 and current[0][0] != 1) or
                                   (winner == 0 and current[0][0] != 0)):
                goal[goal_move[0]][goal_move[1]] = -1
            else:
                goal[goal_move[0]][goal_move[
                    1]] = 1  #Une égalité est traitée comme une victoire (on n'a pas perdu après tout)
        b = Board()
        for j in range(i + 1):
            b.push(Board.name_to_flat(moves[j]))
            #move = board.name_to_coord(moves[j])
            #if j % 2 == 0: #black plays first
            #    black[move[0]][move[1]] = 1
            #else:
            #    white[move[0]][move[1]] = 1
            #if (i - j) < 8:
            #    memo[i - j][move[0]][move[1]] = 1
        for x in range(9):
            for y in range(9):
                p = b._board[Board.flatten((x, Board._BOARDSIZE - y - 1))]
                if p == Board._WHITE:
                    white[x][y] = 1
                elif p == Board._BLACK:
                    black[x][y] = 1

        curr_data = np.dstack(
            (black, white, current, memo[0], memo[1], memo[2], memo[3],
             memo[4], memo[5], memo[6], memo[7]))

        if all_rotations:
            datas.append([curr_data, np.reshape(goal, 81)])
            datas.append([
                np.rot90(curr_data, k=1, axes=(0, 1)),
                np.reshape(np.rot90(goal, k=1, axes=(0, 1)), 81)
            ])
            datas.append([
                np.rot90(curr_data, k=2, axes=(0, 1)),
                np.reshape(np.rot90(goal, k=2, axes=(0, 1)), 81)
            ])
            datas.append([
                np.rot90(curr_data, k=3, axes=(0, 1)),
                np.reshape(np.rot90(goal, k=3, axes=(0, 1)), 81)
            ])

            curr_data = np.flipud(curr_data)
            goal = np.flipud(goal)

            datas.append([curr_data, np.reshape(goal, 81)])
            datas.append([
                np.rot90(curr_data, k=1, axes=(0, 1)),
                np.reshape(np.rot90(goal, k=1, axes=(0, 1)), 81)
            ])
            datas.append([
                np.rot90(curr_data, k=2, axes=(0, 1)),
                np.reshape(np.rot90(goal, k=2, axes=(0, 1)), 81)
            ])
            datas.append([
                np.rot90(curr_data, k=3, axes=(0, 1)),
                np.reshape(np.rot90(goal, k=3, axes=(0, 1)), 81)
            ])
        else:
            datas.append([curr_data])
    return datas
Beispiel #14
0
 def getPlayerMove(self):
     if self._board.is_game_over():
         return "PASS"
     move = self.select_move(self._board)
     self._board.play_move(move)
     return Board.flat_to_name(move)
Beispiel #15
0
 def update_winrate(self, player, score):
     self.total_scores[player] += score
     self.total_scores[Board.flip(player)] += (1 - score)
     self.num_rollouts += 1
Beispiel #16
0
 def playOpponentMove(self, move):
     self._board.play_move(Board.name_to_flat(move))
Beispiel #17
0
    def backpropagate(self, node, value):
        node.visits += 1
        node.total_value += value
        node.value = node.total_value/node.visits
        print(f"After backprop, node value {node.value}")
        if node.parent:
            self.backpropagate(node.parent,value)

    def play(self):
        return self.select(self.node.best_child())


print(f"\nCréation de la board :\n")
from rollout import Rollout, copy_board
from Goban import Board
board = Board()

print(np.array(board.generate_legal_moves()).shape) #81 move possible + pass

print(f"Création du noeud :\n")
base_node = MCTSNode(board, 0) # prior to CNN.predict(board)) instead of 0

print(f"Création du MCTS Tree :\n")
mcts_tree= MCTSTree(base_node)

# First step : expand base node
#mcts_tree.expand()
mcts_tree.select() #expand done in select

print("\n\n!!!!!!!!!!!!!!!!!!!\n\n")
for node in mcts_tree.nodeList:
Beispiel #18
0
class myPlayer(PlayerInterface):
    def __init__(self):
        self._board = Board()
        self._mycolor = None

    def getPlayerName(self):
        return "Team 38"

    def getPlayerMove(self):
        if self._board.is_game_over():
            return "PASS"
        move = self.select_move(self._board)
        self._board.play_move(move)
        return Board.flat_to_name(move)

    def playOpponentMove(self, move):
        self._board.play_move(Board.name_to_flat(move))

    def newGame(self, color):
        self._mycolor = color
        self._opponent = Board.flip(color)

    def endGame(self, winner):
        if self._mycolor == winner:
            print("I won :D")
        else:
            print("I lost :(")

    @staticmethod
    def select_move(board_org, max_time=7, temperature=1.33):
        start_time = time.time()
        root = MCTSNode(board_org.weak_legal_moves())
        # exploit the chance when the other player passes and he's losing
        if (board_org._lastPlayerHasPassed) and (
            ((board_org.next_player() == board_org._WHITE) and
             (board_org._nbWHITE > board_org._nbBLACK)) or
            ((board_org.next_player() == board_org._BLACK) and
             (board_org._nbWHITE < board_org._nbBLACK))):
            return -1
        # add nodes (at least 1,000 rollouts per turn)
        i = 0
        """pool = Pool()"""
        while (True):
            board = copy.deepcopy(board_org)
            node = root
            while (not node.can_add_child()) and (not board.is_game_over()):
                node = myPlayer.select_child(node, board, temperature)
            if node.can_add_child() and not board.is_game_over():
                node = node.add_random_child(board)
            """
            winners = []
            results = []
            # use all cores of the processor
            for proc in range(pool._processes):
                results.append(pool.apply_async(myPlayer.simulate_random_game, [board]))
            for res in results:
                winners.append(res.get())
            while node is not None:
                for winner in winners:
                    node.record_win(winner)
                node = node.parent
            if (time.time() - start_time >= max_time):
                print()
                break
            i+=pool._processes
            """
            winner = myPlayer.simulate_random_game(board)
            while node is not None:
                node.record_win(winner)
                node = node.parent
            i += 1
            print("Rounds %d (%f)" % (i, time.time() - start_time), end='\r')
            if (time.time() - start_time >= max_time):
                print()
                break
        # debug
        scored_moves = [(child.winning_frac(board_org), child.move,
                         child.num_rollouts) for child in root.children]
        scored_moves.sort(key=lambda x: x[0], reverse=True)
        for s, m, n in scored_moves[:5]:
            print('%s - %.2f (%d)' % (m, s, n))
        # pick best node
        best_move = -1
        best_pct = -1.0
        for child in root.children:
            child_pct = child.winning_frac(board_org)
            if child_pct > best_pct:
                best_pct = child_pct
                best_move = child.move
        print('Select move %s with win pct %.3f' % (best_move, best_pct))
        return best_move

    @staticmethod
    def select_child(node, board, temperature):
        # upper confidence bound for trees (UCT) metric
        total_rollouts = sum(child.num_rollouts
                             for child in node.children) + 0.001
        log_rollouts = math.log(total_rollouts)

        best_score = -1
        best_child = None
        best_move = -1
        # loop over each child.
        for child in node.children:
            # calculate the UCT score.
            win_percentage = child.winning_frac(board)
            exploration_factor = math.sqrt(log_rollouts / child.num_rollouts)
            uct_score = win_percentage + temperature * exploration_factor
            # Check if this is the largest we've seen so far.
            if uct_score > best_score:
                best_score = uct_score
                best_child = child
                best_move = child.move
        if (best_child == None):
            best_child = node
            best_move = -1
        board.play_move(best_move)
        return best_child

    @staticmethod
    def simulate_random_game(board):
        def is_point_an_eye(board, coord):
            # We must control 3 out of 4 corners if the point is in the middle
            # of the board; on the edge we must control all corners.
            friendly_corners = 0
            off_board_corners = 0
            i_org = i = board._neighborsEntries[coord]
            while board._neighbors[i] != -1:
                n = board._board[board._neighbors[i]]
                if n != board.next_player():
                    return False
                if n == board.next_player():
                    friendly_corners += 1
                i += 1
            if i >= i_org + 4:
                # Point is in the middle.
                return friendly_corners >= 3
            # Point is on the edge or corner.
            return (4 - i_org - i) + friendly_corners == 4

        def is_pass_valid(board, coord):
            # We can only pass if we are winning
            if coord != -1:
                return True
            if board.next_player() == board._BLACK:
                return board._nbWHITE < board._nbBLACK
            else:
                return board._nbWHITE > board._nbBLACK

        # ==============================
        turns = 0
        while not board.is_game_over():
            turns += 1
            # exploit the chance when the other player passes and he's losing
            if (board._lastPlayerHasPassed) and (is_pass_valid(board, -1)):
                board.play_move(-1)
                continue
            moves = board.weak_legal_moves()
            random.shuffle(moves)
            valid_move = -1  # PASS
            for move in moves:
                """(move != -1) and"""
                if (is_pass_valid(board, move)) and (not is_point_an_eye(
                        board, move)) and (board.play_move(move)):
                    valid_move = move
                    break
            if valid_move == -1:
                board.play_move(-1)
            if turns > 100:
                break
        if (board._nbWHITE > board._nbBLACK):
            return "1-0"
        elif (board._nbWHITE < board._nbBLACK):
            return "0-1"
        else:
            return "1/2-1/2"
Beispiel #19
0
class myPlayer(PlayerInterface):
    def __init__(self):
        self._board = Board()
        self._mycolor = None

    def getPlayerName(self):
        return "Team 38"

    def getPlayerMove(self):
        if self._board.is_game_over():
            return "PASS"
        move = self.select_move(self._board)
        self._board.play_move(move)
        return Board.flat_to_name(move)

    def playOpponentMove(self, move):
        self._board.play_move(Board.name_to_flat(move))

    def newGame(self, color):
        self._mycolor = color
        self._opponent = Board.flip(color)

    def endGame(self, winner):
        if self._mycolor == winner:
            print("I won :D")
        else:
            print("I lost :(")

    @staticmethod
    def select_move(board_org, max_time=7.4, temperature=1.2):
        start_time = time.time()
        root = MCTSNode(board_org.weak_legal_moves())
        # add nodes (at least 10,000 rollouts per turn)
        i = 0
        while (True):
            board = copy.deepcopy(board_org)
            node = root
            while (not node.can_add_child()) and (not board.is_game_over()):
                node = myPlayer.select_child(node, board, temperature)
                #board.push(node.move)
            if node.can_add_child() and not board.is_game_over():
                node = node.add_random_child(board)
                #board.push(node.move)
            winner = myPlayer.simulate_random_game(board)
            while node is not None:
                node.record_win(winner)
                node = node.parent
            if (time.time() - start_time >= max_time):
                print()
                break
            i += 1
            print("Rounds %d (%f)" % (i, time.time() - start_time), end='\r')
        # debug
        scored_moves = [(child.winning_frac(board_org.next_player()),
                         child.move, child.num_rollouts)
                        for child in root.children]
        scored_moves.sort(key=lambda x: x[0], reverse=True)
        for s, m, n in scored_moves[:5]:
            print('%s - %.3f (%d)' % (m, s, n))
        # pick best node
        best_move = -1
        best_pct = -1.0
        for child in root.children:
            child_pct = child.winning_frac(board_org.next_player())
            if child_pct > best_pct:
                best_pct = child_pct
                best_move = child.move
        print('Select move %s with win pct %.3f' % (best_move, best_pct))
        return best_move

    @staticmethod
    def select_child(node, board, temperature):
        # upper confidence bound for trees (UCT) metric
        total_rollouts = sum(child.num_rollouts for child in node.children)
        log_rollouts = math.log(total_rollouts)

        best_score = -1
        best_child = None
        # loop over each child.
        for child in node.children:
            # calculate the UCT score.
            win_percentage = child.winning_frac(board.next_player())
            exploration_factor = math.sqrt(log_rollouts / child.num_rollouts)
            uct_score = win_percentage + temperature * exploration_factor
            # Check if this is the largest we've seen so far.
            if uct_score > best_score:
                best_score = uct_score
                best_child = child
        board.play_move(best_child.move)
        return best_child

    @staticmethod
    def simulate_random_game(board):
        def is_point_an_eye(board, coord):
            # We must control 3 out of 4 corners if the point is in the middle
            # of the board; on the edge we must control all corners.
            friendly_corners = 0
            i_org = i = board._neighborsEntries[coord]
            while board._neighbors[i] != -1:
                n = board._board[board._neighbors[i]]
                if n == board.next_player():
                    return False
                if (n != Board._EMPTY) or (n != board.next_player()):
                    friendly_corners += 1
                i += 1
            if i >= i_org + 4:
                # Point is in the middle.
                return friendly_corners >= 3
            # Point is on the edge or corner.
            return (4 - i_org - i) + friendly_corners == 4

        # ==============================
        while not board.is_game_over():
            moves = board.weak_legal_moves()
            random.shuffle(moves)
            valid_move = -1  # PASS
            for move in moves:
                if not (is_point_an_eye(board,
                                        move)) and (board.play_move(move)):
                    valid_move = move
                    break
            if valid_move == -1:
                board.play_move(-1)
        if (board._nbWHITE > board._nbBLACK):
            return "1-0"
        elif (board._nbWHITE < board._nbBLACK):
            return "0-1"
        else:
            return "1/2-1/2"