def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.info('------EVALUATING LEAF------') if done == 0: value, probs, allowedActions = self.get_preds(leaf.state) lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState, _, _ = leaf.state.takeAction(action) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.info('existing node...%s...', node.id) newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) else: lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) return ((value, breadcrumbs))
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.debug('------EVALUATING LEAF------') if not done: state = GameState.from_id(leaf.state_id, config.GRID_SHAPE) value, probs, allowedActions = self.get_preds(state) lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f', state.currentPlayer, value) for idx, allowedAction in enumerate(allowedActions): if allowedAction: newState, _, _ = state.takeAction(idx) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) lg.logger_mcts.debug('added node...%s...p = %f', node.state_id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.debug('existing node...%s...', node.state_id) newEdge = mc.Edge(leaf, node, probs[idx], idx) leaf.edges.append((idx, newEdge)) else: lg.logger_mcts.debug( 'GAME VALUE FOR %d: %f', GameState.current_player_from_id(leaf.state_id), value) return ((value, breadcrumbs))
def evaluateLeaf(self, leaf, value, done, breadcrumbs): if done == 0: value, probs, allowedActions = self.get_preds(leaf.state) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState, _, _ = leaf.state.takeAction(index_to_move(action)) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) else: node = self.mcts.tree[newState.id] newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) return ((value, breadcrumbs))
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.info('------EVALUATING LEAF------') #评估该叶子节点,如果不是终态,则进行预测 if done == 0: #经过预测得到的value值已经不是0或者-1了,因为对于非终态(目前棋盘上的棋子暂时无法判断胜负) # 的叶子节点返回的价值,无法决定整个棋盘的局势,需要通过神经网络来预测 value, probs, allowedActions = self.get_preds(leaf.state) lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) # probs = probs[allowedActions] for idx, action in enumerate(allowedActions): # takeAction return (newState, value, done) newState, _, _ = leaf.state.takeAction(action) if newState.id not in self.mcts.tree: #获取节点的信息 #节点的id其实是用棋盘黑白棋的状态合并来表示的 #Node id:000000000000000000000000000000000001000000000000000000000000000000000010000000000000 #node本身是一个包含了众多信息的“类” node = mc.Node(newState) #添加到树中 self.mcts.addNode(node) lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) else: #tree相当于一个数组,id是索引,node是值,用来存储当前的树 node = self.mcts.tree[newState.id] lg.logger_mcts.info('existing node...%s...', node.id) #Edge: inNode, outNode, prior, action #inNode是currentNode,outNode是采取行动后新到达的节点 #传入了 ['P'] 值 newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) else: #如果叶子节点即为终态,则直接返回价值0或者-1 lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) #返回当前行棋序列下(棋盘)的价值 return ((value, breadcrumbs))
def evaluate_leaf(self, leaf, value, over, backtrack): if over == 0: value, probabilities, allowed = self.get_predictions(leaf.state) # maybe trying to only have probs of allowed moves? probs = [] for move in allowed: probs.append(probabilities[move]) for idx, move in enumerate(allowed): new_state, _, _ = leaf.state.make_move(move) if new_state.id not in self.mcts.tree: node = mc.Node(new_state) self.mcts.add_node(node) else: node = self.mcts.tree[new_state.id] new_edge = mc.Edge(leaf, node, probs[idx], move) leaf.edges.append((move, new_edge)) return (value, backtrack)