Ejemplo n.º 1
0
    def backFill(self, leaf, value, breadcrumbs):
        lg.logger_mcts.debug('------DOING BACKFILL------')

        currentPlayer = GameState.current_player_from_id(leaf.state_id)

        for edge in breadcrumbs:
            playerTurn = GameState.current_player_from_id(edge.inNode.state_id)
            if playerTurn == currentPlayer:
                direction = 1
            else:
                direction = -1

            edge.stats['N'] = edge.stats['N'] + 1
            edge.stats['W'] = edge.stats['W'] + value * direction
            edge.stats['Q'] = edge.stats['W'] / edge.stats['N']

            lg.logger_mcts.debug(
                'updating edge with value %f for player %d... N = %d, W = %f, Q = %f',
                value * direction, playerTurn, edge.stats['N'],
                edge.stats['W'], edge.stats['Q'])

            if lg.logger_mcts.isEnabledFor(logging.DEBUG):
                lg.logger_mcts.debug(
                    GameState.from_id(edge.outNode.state_id,
                                      config.GRID_SHAPE).render())
Ejemplo n.º 2
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.debug('------EVALUATING LEAF------')

        if not done:

            state = GameState.from_id(leaf.state_id, config.GRID_SHAPE)
            value, probs, allowedActions = self.get_preds(state)
            lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f',
                                 state.currentPlayer, value)

            for idx, allowedAction in enumerate(allowedActions):
                if allowedAction:
                    newState, _, _ = state.takeAction(idx)
                    if newState.id not in self.mcts.tree:
                        node = mc.Node(newState)
                        self.mcts.addNode(node)
                        lg.logger_mcts.debug('added node...%s...p = %f',
                                             node.state_id, probs[idx])
                    else:
                        node = self.mcts.tree[newState.id]
                        lg.logger_mcts.debug('existing node...%s...',
                                             node.state_id)

                    newEdge = mc.Edge(leaf, node, probs[idx], idx)
                    leaf.edges.append((idx, newEdge))

        else:
            lg.logger_mcts.debug(
                'GAME VALUE FOR %d: %f',
                GameState.current_player_from_id(leaf.state_id), value)

        return ((value, breadcrumbs))