def backFill(self, leaf, value, breadcrumbs): lg.logger_mcts.debug('------DOING BACKFILL------') currentPlayer = GameState.current_player_from_id(leaf.state_id) for edge in breadcrumbs: playerTurn = GameState.current_player_from_id(edge.inNode.state_id) if playerTurn == currentPlayer: direction = 1 else: direction = -1 edge.stats['N'] = edge.stats['N'] + 1 edge.stats['W'] = edge.stats['W'] + value * direction edge.stats['Q'] = edge.stats['W'] / edge.stats['N'] lg.logger_mcts.debug( 'updating edge with value %f for player %d... N = %d, W = %f, Q = %f', value * direction, playerTurn, edge.stats['N'], edge.stats['W'], edge.stats['Q']) if lg.logger_mcts.isEnabledFor(logging.DEBUG): lg.logger_mcts.debug( GameState.from_id(edge.outNode.state_id, config.GRID_SHAPE).render())
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.debug('------EVALUATING LEAF------') if not done: state = GameState.from_id(leaf.state_id, config.GRID_SHAPE) value, probs, allowedActions = self.get_preds(state) lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f', state.currentPlayer, value) for idx, allowedAction in enumerate(allowedActions): if allowedAction: newState, _, _ = state.takeAction(idx) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) lg.logger_mcts.debug('added node...%s...p = %f', node.state_id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.debug('existing node...%s...', node.state_id) newEdge = mc.Edge(leaf, node, probs[idx], idx) leaf.edges.append((idx, newEdge)) else: lg.logger_mcts.debug( 'GAME VALUE FOR %d: %f', GameState.current_player_from_id(leaf.state_id), value) return ((value, breadcrumbs))