Esempio n. 1
0
def play():
    g = game.Game()
    net = policy_value_net.Net()
    mc = MCTS.MCTS(net=net)
    node = MCTS.Node(None, None)
    while True:
        img = g.get_cur_img()
        cv2.imshow('board_img', img)
        cv2.setMouseCallback('board_img', g.bind_click)
        cv2.waitKey(33)
        while True:
            before_len = len(g.board.valid_states)
            board_img = g.get_cur_img()
            cv2.imshow('board_img', board_img)
            cv2.waitKey(33)
            now_len = len(g.board.valid_states)
            if now_len < before_len:
                board_img = g.get_cur_img()
                cv2.imshow('board_img', board_img)
                cv2.waitKey(33)
                action, next_node, _ = mc.search(g.board, node)
                x, y = g.board.location_to_move(action)
                print(action)
                print(x, y)
                g.board.do_move(action)
                node = MCTS.Node(None, None)
Esempio n. 2
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.info('------EVALUATING LEAF------')

        if done == 0:

            value, probs, allowedActions = self.get_preds(leaf.state)
            lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f',
                                leaf.state.playerTurn, value)

            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mcts.tree:
                    node = mc.Node(newState)
                    self.mcts.addNode(node)
                    lg.logger_mcts.info('added node...%s...p = %f', node.id,
                                        probs[idx])
                else:
                    node = self.mcts.tree[newState.id]
                    lg.logger_mcts.info('existing node...%s...', node.id)

                newEdge = mc.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))

        else:
            lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn,
                                value)

        return ((value, breadcrumbs))
Esempio n. 3
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.debug('------EVALUATING LEAF------')

        if not done:

            state = GameState.from_id(leaf.state_id, config.GRID_SHAPE)
            value, probs, allowedActions = self.get_preds(state)
            lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f',
                                 state.currentPlayer, value)

            for idx, allowedAction in enumerate(allowedActions):
                if allowedAction:
                    newState, _, _ = state.takeAction(idx)
                    if newState.id not in self.mcts.tree:
                        node = mc.Node(newState)
                        self.mcts.addNode(node)
                        lg.logger_mcts.debug('added node...%s...p = %f',
                                             node.state_id, probs[idx])
                    else:
                        node = self.mcts.tree[newState.id]
                        lg.logger_mcts.debug('existing node...%s...',
                                             node.state_id)

                    newEdge = mc.Edge(leaf, node, probs[idx], idx)
                    leaf.edges.append((idx, newEdge))

        else:
            lg.logger_mcts.debug(
                'GAME VALUE FOR %d: %f',
                GameState.current_player_from_id(leaf.state_id), value)

        return ((value, breadcrumbs))
Esempio n. 4
0
    def test_init_mcts(self):

        env = Game()
        root = mc.Node(env.gameState)
        mcts = mc.MCTS(root, config.CPUCT)
        self.assertEqual(mcts.root, root)
        self.assertEqual(mcts.tree[env.gameState._generate_id()], root)
Esempio n. 5
0
 def buildMCTS(self, state):
     lg.logger_mcts.info(
         '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
     #建立一颗新树,传入当前节点的信息到MCTS树中的根节点。当mcts不存在时,通过此方法建立root
     self.root = mc.Node(state)
     #import MCTS as mc  MCTS:return currentNode, value, done, breadcrumbs
     self.mcts = mc.MCTS(self.root, self.cpuct)
Esempio n. 6
0
def Policy_Player_MCTS(game):
    mytree = MCTS.Node(copy(game))
    for _ in range(1000):
        mytree.explore(policy)

    mytreenext, (v, nn_v, p, nn_p) = mytree.next(temperature=0.1)

    return mytreenext.game.last_move
Esempio n. 7
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):
        if done == 0:

            value, probs, allowedActions = self.get_preds(leaf.state)
            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(index_to_move(action))
                if newState.id not in self.mcts.tree:
                    node = mc.Node(newState)
                    self.mcts.addNode(node)
                else:
                    node = self.mcts.tree[newState.id]

                newEdge = mc.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))

        return ((value, breadcrumbs))
Esempio n. 8
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.info('------EVALUATING LEAF------')
        #评估该叶子节点,如果不是终态,则进行预测
        if done == 0:
            #经过预测得到的value值已经不是0或者-1了,因为对于非终态(目前棋盘上的棋子暂时无法判断胜负)
            # 的叶子节点返回的价值,无法决定整个棋盘的局势,需要通过神经网络来预测
            value, probs, allowedActions = self.get_preds(leaf.state)
            lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f',
                                leaf.state.playerTurn, value)
            #
            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                # takeAction return (newState, value, done)
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mcts.tree:
                    #获取节点的信息
                    #节点的id其实是用棋盘黑白棋的状态合并来表示的
                    #Node id:000000000000000000000000000000000001000000000000000000000000000000000010000000000000
                    #node本身是一个包含了众多信息的“类”
                    node = mc.Node(newState)
                    #添加到树中
                    self.mcts.addNode(node)
                    lg.logger_mcts.info('added node...%s...p = %f', node.id,
                                        probs[idx])
                else:
                    #tree相当于一个数组,id是索引,node是值,用来存储当前的树
                    node = self.mcts.tree[newState.id]
                    lg.logger_mcts.info('existing node...%s...', node.id)
                #Edge: inNode, outNode, prior, action
                #inNode是currentNode,outNode是采取行动后新到达的节点
                #传入了 ['P'] 值
                newEdge = mc.Edge(leaf, node, probs[idx], action)

                leaf.edges.append((action, newEdge))

        else:
            #如果叶子节点即为终态,则直接返回价值0或者-1
            lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn,
                                value)
        #返回当前行棋序列下(棋盘)的价值
        return ((value, breadcrumbs))
    def evaluate_leaf(self, leaf, value, over, backtrack):

        if over == 0:
            value, probabilities, allowed = self.get_predictions(leaf.state)

            # maybe trying to only have probs of allowed moves?
            probs = []
            for move in allowed:
                probs.append(probabilities[move])

            for idx, move in enumerate(allowed):
                new_state, _, _ = leaf.state.make_move(move)
                if new_state.id not in self.mcts.tree:
                    node = mc.Node(new_state)
                    self.mcts.add_node(node)
                else:
                    node = self.mcts.tree[new_state.id]

                new_edge = mc.Edge(leaf, node, probs[idx], move)
                leaf.edges.append((move, new_edge))

        return (value, backtrack)
Esempio n. 10
0
 def buildMCTS(self, state):
     lg.logger_mcts.info(
         '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
     self.root = mc.Node(state)
     self.mcts = mc.MCTS(self.root, self.cpuct)
Esempio n. 11
0
 def buildMCTS(self, state):
     self.root = mc.Node(state)
     self.mcts = mc.MCTS(self.root, self.cpuct)
Esempio n. 12
0
 def build_MCTS(self, state):
     self.root = mc.Node(state)
     self.mcts = mc.MCTS(self.root, self.cpu_count)
Esempio n. 13
0
# try a higher number
episodes = 2000

import progressbar as pb
widget = ['training loop: ', pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA()]
timer = pb.ProgressBar(widgets=widget, maxval=episodes).start()

outcomes = []
policy_loss = []

Nmax = 1000

for e in range(episodes):

    mytree = MCTS.Node(game)
    logterm = []
    vterm = []

    while mytree.outcome is None:
        for _ in range(Nmax):
            mytree.explore(policy)
            if mytree.N >= Nmax:
                break

        current_player = mytree.game.player
        mytree, (v, nn_v, p, nn_p) = mytree.next()
        mytree.detach_mother()

        loglist = torch.log(nn_p) * p
        constant = torch.where(p > 0, p * torch.log(p), torch.tensor(0.))
from collections import deque
import MCTS

episodes = 400
outcomes = []
losses = []

import progressbar as pb

widget = ['training loop: ', pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA()]
timer = pb.ProgressBar(widgets=widget, maxval=episodes).start()

for e in range(episodes):

    mytree = MCTS.Node(ConnectN(**game_setting))
    vterm = []
    logterm = []

    while mytree.outcome is None:
        for _ in range(50):
            mytree.explore(policy)

        current_player = mytree.game.player
        mytree, (v, nn_v, p, nn_p) = mytree.next()
        mytree.detach_mother()

        # solution
        # compute prob* log pi
        loglist = torch.log(nn_p) * p