Ejemplo n.º 1
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.debug('------EVALUATING LEAF------')

        if not done:

            state = GameState.from_id(leaf.state_id, config.GRID_SHAPE)
            value, probs, allowedActions = self.get_preds(state)
            lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f',
                                 state.currentPlayer, value)

            for idx, allowedAction in enumerate(allowedActions):
                if allowedAction:
                    newState, _, _ = state.takeAction(idx)
                    if newState.id not in self.mcts.tree:
                        node = mc.Node(newState)
                        self.mcts.addNode(node)
                        lg.logger_mcts.debug('added node...%s...p = %f',
                                             node.state_id, probs[idx])
                    else:
                        node = self.mcts.tree[newState.id]
                        lg.logger_mcts.debug('existing node...%s...',
                                             node.state_id)

                    newEdge = mc.Edge(leaf, node, probs[idx], idx)
                    leaf.edges.append((idx, newEdge))

        else:
            lg.logger_mcts.debug(
                'GAME VALUE FOR %d: %f',
                GameState.current_player_from_id(leaf.state_id), value)

        return ((value, breadcrumbs))
Ejemplo n.º 2
0
    def backFill(self, leaf, value, breadcrumbs):
        lg.logger_mcts.debug('------DOING BACKFILL------')

        currentPlayer = GameState.current_player_from_id(leaf.state_id)

        for edge in breadcrumbs:
            playerTurn = GameState.current_player_from_id(edge.inNode.state_id)
            if playerTurn == currentPlayer:
                direction = 1
            else:
                direction = -1

            edge.stats['N'] = edge.stats['N'] + 1
            edge.stats['W'] = edge.stats['W'] + value * direction
            edge.stats['Q'] = edge.stats['W'] / edge.stats['N']

            lg.logger_mcts.debug(
                'updating edge with value %f for player %d... N = %d, W = %f, Q = %f',
                value * direction, playerTurn, edge.stats['N'],
                edge.stats['W'], edge.stats['Q'])

            if lg.logger_mcts.isEnabledFor(logging.DEBUG):
                lg.logger_mcts.debug(
                    GameState.from_id(edge.outNode.state_id,
                                      config.GRID_SHAPE).render())
Ejemplo n.º 3
0
    def test_id(self):
        """Je transforme les victoires de test_victory_true_4 en id"""
        """Puis je crée un GameState à partir de cet id"""
        """Et je teste si le nouveau GameState est égal à l'ancien"""

        for ligne in range(GRID_SHAPE[0] + 1 - NB_TOKENS_VICTORY,
                           GRID_SHAPE[0]):
            for column in range(0, GRID_SHAPE[1] + 1 - NB_TOKENS_VICTORY):
                #On aligne 4 PLAYER_1 dans la diagonale dont le point de départ (bas,gauche) est (ligne,column)
                board = np.full(GRID_SHAPE, NONE, dtype=np.int8)
                for i, j in zip(range(ligne, ligne - NB_TOKENS_VICTORY, -1),
                                range(column, column + NB_TOKENS_VICTORY)):
                    board[i, j] = PLAYER_1
                    board[0:i, j] = PLAYER_2
                    board[i, 0:j] = PLAYER_2
                #On remplit les lignes sous ligne avec PLAYER_2
                board[0:ligne - 3, :] = PLAYER_2
                game_state = GameState(currentPlayer=PLAYER_1, board=board)

                id = game_state.id

                new_game_state = GameState.from_id(id, board.shape)

                # See https://stackoverflow.com/questions/3302949/best-way-to-assert-for-numpy-array-equality
                self.assertIsNone(
                    np.testing.assert_array_equal(new_game_state.board,
                                                  game_state.board))
                self.assertEqual(new_game_state.currentPlayer,
                                 game_state.currentPlayer)
Ejemplo n.º 4
0
    def simulate(self):

        if lg.logger_mcts.isEnabledFor(logging.DEBUG):
            state = GameState.from_id(self.mcts.root.state_id,
                                      config.GRID_SHAPE)
            lg.logger_mcts.debug('ROOT NODE...%s', self.mcts.root.state_id)
            lg.logger_mcts.debug(state.render())
            lg.logger_mcts.debug('CURRENT PLAYER...%d', state.currentPlayer)

        ##### MOVE THE LEAF NODE
        leaf, value, done, breadcrumbs = self.mcts.moveToLeaf()
        if lg.logger_mcts.isEnabledFor(logging.DEBUG):
            state = GameState.from_id(leaf.state_id, config.GRID_SHAPE)
            lg.logger_mcts.debug(state.render())

        ##### EVALUATE THE LEAF NODE
        value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs)

        ##### BACKFILL THE VALUE THROUGH THE TREE
        self.mcts.backFill(leaf, value, breadcrumbs)
Ejemplo n.º 5
0
    def moveToLeaf(self):

        lg.logger_mcts.debug('------MOVING TO LEAF------')

        breadcrumbs = []
        currentNode = self.root

        done = False
        value = 0

        while not currentNode.isLeaf():

            state = GameState.from_id(currentNode.state_id, config.GRID_SHAPE)

            lg.logger_mcts.debug('PLAYER TURN...%d', state.currentPlayer)

            maxQU = -99999

            if currentNode == self.root:
                epsilon = config.EPSILON
                nu = np.random.dirichlet([config.ALPHA] *
                                         len(currentNode.edges))
            else:
                epsilon = 0
                nu = [0] * len(currentNode.edges)

            Nb = 0
            for action, edge in currentNode.edges:
                Nb = Nb + edge.stats['N']

            for idx, (action, edge) in enumerate(currentNode.edges):

                U = self.cpuct * \
                 ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] )  * \
                 np.sqrt(Nb) / (1 + edge.stats['N'])

                Q = edge.stats['Q']

                lg.logger_mcts.debug(
                    'action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f',
                    action, action % 7, edge.stats['N'],
                    np.round(edge.stats['P'], 6), np.round(nu[idx], 6),
                    ((1 - epsilon) * edge.stats['P'] + epsilon * nu[idx]),
                    np.round(edge.stats['W'], 6), np.round(Q,
                                                           6), np.round(U, 6),
                    np.round(Q + U, 6))

                if Q + U > maxQU:
                    maxQU = Q + U
                    simulationAction = action
                    simulationEdge = edge

            lg.logger_mcts.debug('action with highest Q + U...%d',
                                 simulationAction)

            newState, value, done = state.takeAction(
                simulationAction
            )  #the value of the newState from the POV of the new playerTurn
            currentNode = simulationEdge.outNode
            breadcrumbs.append(simulationEdge)

        lg.logger_mcts.debug('DONE...%d', done)

        return currentNode, value, done, breadcrumbs