def play(): g = game.Game() net = policy_value_net.Net() mc = MCTS.MCTS(net=net) node = MCTS.Node(None, None) while True: img = g.get_cur_img() cv2.imshow('board_img', img) cv2.setMouseCallback('board_img', g.bind_click) cv2.waitKey(33) while True: before_len = len(g.board.valid_states) board_img = g.get_cur_img() cv2.imshow('board_img', board_img) cv2.waitKey(33) now_len = len(g.board.valid_states) if now_len < before_len: board_img = g.get_cur_img() cv2.imshow('board_img', board_img) cv2.waitKey(33) action, next_node, _ = mc.search(g.board, node) x, y = g.board.location_to_move(action) print(action) print(x, y) g.board.do_move(action) node = MCTS.Node(None, None)
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.info('------EVALUATING LEAF------') if done == 0: value, probs, allowedActions = self.get_preds(leaf.state) lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState, _, _ = leaf.state.takeAction(action) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.info('existing node...%s...', node.id) newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) else: lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) return ((value, breadcrumbs))
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.debug('------EVALUATING LEAF------') if not done: state = GameState.from_id(leaf.state_id, config.GRID_SHAPE) value, probs, allowedActions = self.get_preds(state) lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f', state.currentPlayer, value) for idx, allowedAction in enumerate(allowedActions): if allowedAction: newState, _, _ = state.takeAction(idx) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) lg.logger_mcts.debug('added node...%s...p = %f', node.state_id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.debug('existing node...%s...', node.state_id) newEdge = mc.Edge(leaf, node, probs[idx], idx) leaf.edges.append((idx, newEdge)) else: lg.logger_mcts.debug( 'GAME VALUE FOR %d: %f', GameState.current_player_from_id(leaf.state_id), value) return ((value, breadcrumbs))
def test_init_mcts(self): env = Game() root = mc.Node(env.gameState) mcts = mc.MCTS(root, config.CPUCT) self.assertEqual(mcts.root, root) self.assertEqual(mcts.tree[env.gameState._generate_id()], root)
def buildMCTS(self, state): lg.logger_mcts.info( '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name) #建立一颗新树,传入当前节点的信息到MCTS树中的根节点。当mcts不存在时,通过此方法建立root self.root = mc.Node(state) #import MCTS as mc MCTS:return currentNode, value, done, breadcrumbs self.mcts = mc.MCTS(self.root, self.cpuct)
def Policy_Player_MCTS(game): mytree = MCTS.Node(copy(game)) for _ in range(1000): mytree.explore(policy) mytreenext, (v, nn_v, p, nn_p) = mytree.next(temperature=0.1) return mytreenext.game.last_move
def evaluateLeaf(self, leaf, value, done, breadcrumbs): if done == 0: value, probs, allowedActions = self.get_preds(leaf.state) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState, _, _ = leaf.state.takeAction(index_to_move(action)) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) else: node = self.mcts.tree[newState.id] newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) return ((value, breadcrumbs))
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.info('------EVALUATING LEAF------') #评估该叶子节点,如果不是终态,则进行预测 if done == 0: #经过预测得到的value值已经不是0或者-1了,因为对于非终态(目前棋盘上的棋子暂时无法判断胜负) # 的叶子节点返回的价值,无法决定整个棋盘的局势,需要通过神经网络来预测 value, probs, allowedActions = self.get_preds(leaf.state) lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) # probs = probs[allowedActions] for idx, action in enumerate(allowedActions): # takeAction return (newState, value, done) newState, _, _ = leaf.state.takeAction(action) if newState.id not in self.mcts.tree: #获取节点的信息 #节点的id其实是用棋盘黑白棋的状态合并来表示的 #Node id:000000000000000000000000000000000001000000000000000000000000000000000010000000000000 #node本身是一个包含了众多信息的“类” node = mc.Node(newState) #添加到树中 self.mcts.addNode(node) lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) else: #tree相当于一个数组,id是索引,node是值,用来存储当前的树 node = self.mcts.tree[newState.id] lg.logger_mcts.info('existing node...%s...', node.id) #Edge: inNode, outNode, prior, action #inNode是currentNode,outNode是采取行动后新到达的节点 #传入了 ['P'] 值 newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) else: #如果叶子节点即为终态,则直接返回价值0或者-1 lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) #返回当前行棋序列下(棋盘)的价值 return ((value, breadcrumbs))
def evaluate_leaf(self, leaf, value, over, backtrack): if over == 0: value, probabilities, allowed = self.get_predictions(leaf.state) # maybe trying to only have probs of allowed moves? probs = [] for move in allowed: probs.append(probabilities[move]) for idx, move in enumerate(allowed): new_state, _, _ = leaf.state.make_move(move) if new_state.id not in self.mcts.tree: node = mc.Node(new_state) self.mcts.add_node(node) else: node = self.mcts.tree[new_state.id] new_edge = mc.Edge(leaf, node, probs[idx], move) leaf.edges.append((move, new_edge)) return (value, backtrack)
def buildMCTS(self, state): lg.logger_mcts.info( '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name) self.root = mc.Node(state) self.mcts = mc.MCTS(self.root, self.cpuct)
def buildMCTS(self, state): self.root = mc.Node(state) self.mcts = mc.MCTS(self.root, self.cpuct)
def build_MCTS(self, state): self.root = mc.Node(state) self.mcts = mc.MCTS(self.root, self.cpu_count)
# try a higher number episodes = 2000 import progressbar as pb widget = ['training loop: ', pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA()] timer = pb.ProgressBar(widgets=widget, maxval=episodes).start() outcomes = [] policy_loss = [] Nmax = 1000 for e in range(episodes): mytree = MCTS.Node(game) logterm = [] vterm = [] while mytree.outcome is None: for _ in range(Nmax): mytree.explore(policy) if mytree.N >= Nmax: break current_player = mytree.game.player mytree, (v, nn_v, p, nn_p) = mytree.next() mytree.detach_mother() loglist = torch.log(nn_p) * p constant = torch.where(p > 0, p * torch.log(p), torch.tensor(0.))
from collections import deque import MCTS episodes = 400 outcomes = [] losses = [] import progressbar as pb widget = ['training loop: ', pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA()] timer = pb.ProgressBar(widgets=widget, maxval=episodes).start() for e in range(episodes): mytree = MCTS.Node(ConnectN(**game_setting)) vterm = [] logterm = [] while mytree.outcome is None: for _ in range(50): mytree.explore(policy) current_player = mytree.game.player mytree, (v, nn_v, p, nn_p) = mytree.next() mytree.detach_mother() # solution # compute prob* log pi loglist = torch.log(nn_p) * p