Ejemplo n.º 1
0
def test_mcts_convergence(counter_game, counter_game_nn):
    root_state = counter_game()
    root_node = mcts.Node(root_state, np.array([0.3, 0.5]), counter_game_nn())
    for _ in range(500):
        root_node.expand()

    assert root_node.edges[0].Q - root_node.edges[1].Q > 0.5
Ejemplo n.º 2
0
 def human_play(self, move):
     node = self.mctree.tree
     board = node.get_board()
     children = node.get_children()
     children_moves = []
     for child in children:
         sub_board = child.get_board()
         sub_last_move = sub_board.last_move
         children_moves.append(sub_last_move)
     if move in children_moves:
         idx = children_moves.index(move)
         sub_node = children[idx]
         self.mctree.tree = sub_node
         winner = sub_node.board.get_a_winner()
         return winner
     else:
         next_state = board.get_next_state_by_move(move)
         sub_board = board_wuzi.Board(state=next_state,
                                      last_move=move,
                                      **self.kwargs)
         p, v = self.nn.predict(next_state)
         sub_node = mcts.Node(sub_board, p, v)
         self.mctree.tree = sub_node
         winner = sub_board.get_a_winner()
         return winner
Ejemplo n.º 3
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):
        lg.logger_mcts.info('------EVALUATING LEAF------')

        if done == 0:
            value, probs, allowedActions = self.get_preds(leaf.state)
            lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f',
                                leaf.state.playerTurn, value)

            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mcts.tree:
                    node = mcts.Node(newState)
                    self.mcts.addNode(node)
                    lg.logger_mcts.info('added node...%s...p = %f', node.id,
                                        probs[idx])
                else:
                    node = self.mcts.tree[newState.id]
                    lg.logger_mcts.info('existing node...%s...', node.id)

                newEdge = mcts.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))
        else:
            lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn,
                                value)

        return ((value, breadcrumbs))
Ejemplo n.º 4
0
def test_get_leaf_states(counter_game, counter_game_nn):
    root_state = counter_game()
    root_node = mcts.Node(root_state, np.array([0.3, 0.5]), counter_game_nn())

    for _ in range(2):
        root_node.expand()

    leaf_states = list(root_node.get_leaf_states())
    assert len(leaf_states) != 0
Ejemplo n.º 5
0
 def apply_move(self, move):
     self.state.make_move(move)
     match = list(filter(lambda node: node.move == move,
                         self.root.children))
     if len(match):
         self.root = match[0]
         self.root.parent = None
     else:
         self.root = mcts.Node(None, move)
Ejemplo n.º 6
0
def main():
    root = mcts.Node(initial_state(), 0, 0)

    while not is_game_over(root.state):
        root = mcts.mcts(root)

        render(root.state)

        input_action = int(input("Action: "))

        if input_action in root.unvisited_actions:
            newState = play(root.state, input_action)
            root = mcts.Node(newState, root.action, input_action)
        else:
            for child in root.children:
                if child.action == input_action:
                    root = child
        render(root.state)
Ejemplo n.º 7
0
    def __init__(self, game, time=None, iterations=None, c=2):
        if time is None and iterations is None:
            time = 1000

        self.c = c
        self.millis_to_think = time
        self.iterations = iterations

        self.state = game.State()
        self.root = mcts.Node(None, None)
Ejemplo n.º 8
0
 def makeMove(self, node, move):
     if move not in node.childNodes:
         node = mcts.Node(node, move, node.turn ^ 1)
     else:
         node = node.childNodes[move]
     node.isSearchRoot = True
     node.parent.childNodes.clear()
     node.parent.isSearchRoot = False
     site = 1 << move
     self.AddSite(site)
     return node
Ejemplo n.º 9
0
 def __init__(self):
     self.start = time.time()
     super().__init__(0, 0x0000000810000000, 0x0000001008000000)
     self.model = Model()
     #self.model.load('model/Gen' + str(0))
     self.score = 0
     self.table = {}
     self.currentNode = mcts.Node(mcts.FakeNode(), 0, 0, Core(self.turn, self.black, self.white))
     self.currentNode.isGameRoot = True
     self.currentNode.isSearchRoot = True
     self.mctsBatch = mcts.MCTSBatch(self.model,NUM_MCTS)
Ejemplo n.º 10
0
    def search(self, game, time_left):
        self.tree = mcts.Node(copy(game))
        self.time_left = time_left

        while True:
            if self.time_left() < self.TIMER_THRESHOLD:
                break

            self.tree.explore(self.policy)
            self.tree_next, _ = self.tree.next(temperature=2)

        return self.tree_next.game.last_move[0]
Ejemplo n.º 11
0
 def makeMove(self, node, move):
     if move not in node.childNodes:
         node = mcts.Node(node, move, node.turn^1)
     else:
         node = node.childNodes[move]
     node.isSearchRoot = True
     node.parent.childNodes.clear()
     node.parent.isSearchRoot = False
     site = 1 << move
     if self.judge & site:
         self.AddSite(site)
     else:
         print("いや打てへんやん")
         exit()
     return node
Ejemplo n.º 12
0
 def InputEnemy(self):
     nb, nw = self.Count()
     nmTurn = 64 - nb - nw
     if nmTurn < 16:
         site, socre, self.table = moveAI(self.black,self.white,self.turn, nmTurn, self.table, self.score)
         self.score = socre
         self.AddSite(site)
     else:
         # site, score, self.table = moveAINN(self.black,self.white,self.turn, 3, self.table, self.score,self.model)
         # self.score = score
         # self.AddSite(site)
         self.currentNode = mcts.Node(mcts.FakeNode(), 0, self.turn, Core(self.turn, self.black, self.white))
         self.currentNode.isGameRoot = True
         self.currentNode.isSearchRoot = True
         pi = self.mctsBatch.alpha([self.currentNode], 1)[0]
         print(pi)
         self.currentNode = self.makeMove(self.currentNode,int(np.argmax(pi)))
Ejemplo n.º 13
0
 def InputPlayer(self):
     nb, nw = self.Count()
     nmTurn = 64 - nb - nw
     if nmTurn < RAND:
         self.currentNode = mcts.Node(
             mcts.FakeNode(), 0, self.turn,
             Core(self.turn, self.black, self.white))
         self.currentNode.isGameRoot = True
         self.currentNode.isSearchRoot = True
         pi = self.mctsBatch.alpha([self.currentNode], 1)[0]
         move = int(np.argmax(pi))
         self.addHistory(pi, self.turn)
         self.currentNode = self.makeMove(self.currentNode, move)
     else:
         while True:
             move = random.randrange(0, 64)
             site = 1 << move
             if self.judge & site:
                 # self.AddSite(site)
                 self.currentNode = self.makeMove(self.currentNode, move)
                 break
Ejemplo n.º 14
0
def tst_example_children_finalcolum():  #final column check ok
    state = np.array([[1, 2, 3, 4, 0], [1, 2, 3, 0, 0], [0, 2, 3, 0, 0],
                      [0, 2, 0, 0, 0], [0, 0, 0, 0, 0]])
    rootnode = mcts.Node(state, [5, 6], None, ('R', None),
                         ((None, None), (None, None)))
    state_path = []
    queue = [rootnode]
    while len(queue) > 0:
        node = queue.pop(0)
        if node.type == 'R':
            if 5 not in node.remain_rooms:
                state_path.append(deepcopy(node.state))
        node.expand()
        if node.terminal is False:
            for child in node.children:
                queue.append(child)

    states_path = state_path[0:48]
    roomids = [1, 2, 3, 4, 5, 6]
    Cons = np.ones((6, 6))
    vis = mcts.Visualisation(roomids, states_path, Cons, 'None')
    vis.vis_static()
Ejemplo n.º 15
0
def real_game(modelname, time_limit, recommendation_count):
    mode = ask_question("What gamemode are you playing?", ["ap", "cm"])
    side = ask_question("Which side are you playing on?", ["radiant", "dire"])
    first = ask_question("Do you have first pick / ban?", ["y", "n"])

    if mode == "ap":
        util.pick_ban_order = util.allpick_order
    else:
        util.pick_ban_order = util.cm_order

    radiant_goes_first = (side == "radiant"
                          and first == "y") or (side == "dire"
                                                and first == "n")
    node = mcts_transpositions.Node(mcts.State(radiant_goes_first))
    transpositions = dict()
    model = util.load_model(modelname)
    players_turn = (side == "radiant" and node.state.radiant_moves_next) or (
        side == "dire" and not node.state.radiant_moves_next)
    while not node.state.is_terminal():
        print_state(node.state)
        choices = node.state.get_actions()
        choices_sets = [set(i) for i in choices]

        (pick_ban, count) = util.pick_ban_order[node.state.pick_ban_position]
        subject = 'pick' if pick_ban == util.pick else 'ban'
        print("The next action is a", subject, "of", count, "heroes.")

        if players_turn:
            print("It is your turn. MCTS recommends the following heroes: ...")
            (_, root_node, transpositions) = mcts_transpositions.uct_search(
                model,
                initial_node=node,
                time_limit=time_limit,
                transpositions=transpositions)
            node = root_node

            def to_transpo(n):
                return transpositions[mcts_transpositions.state_to_key(
                    n.state)]

            children = sorted(
                root_node.children,
                key=lambda n: to_transpo(
                    n).total_simulated_reward / to_transpo(n).visit_count,
                reverse=True)
            for c in children[:recommendation_count]:
                print([
                    util.simple_heroes.ordered_to_name(i)
                    for i in c.incoming_action
                ],
                      to_transpo(c).total_simulated_reward /
                      to_transpo(c).visit_count,
                      to_transpo(c).visit_count)
        else:
            print("It is the other team's turn. What did they do?")
        players_turn = not players_turn
        choice = get_pick(node.state, pick_ban, count)

        print()
        assert (set(choice) in choices_sets)
        found = False
        for n in node.children:
            if n.incoming_action == choice:
                node = n
                node.parent = None
                node.incoming_action = None
                found = True
        if not found:
            node = mcts.Node(node.state.get_next_state(choice))
    print('Done!')
    print_state(node.state)
    print('Predicting Radiant win probability with all models:')
    for model_name in util.all_models:
        model = util.load_model(model_name)
        print(
            model_name, ':',
            util.predict_radiant_win_probability(
                util.state_to_feature(node.state), model))
Ejemplo n.º 16
0
    def find_move(self,
                  board,
                  min_kldiv=0,
                  max_rolls=0,
                  max_time=0,
                  pvs=0,
                  temperature=False,
                  use_mcts=True):
        """ Searches until kl_div is below `min_kldiv` or for `movetime' milliseconds, or if 0, for `rolls` rollouts. """
        # We try to reuse the previous node, but if we can't, we create a new one.
        if self.node:
            # Check if the board is at one of our children (cheap pondering)
            for node in self.node.children:
                if node.board == board:
                    self.node = node
                    if self.args.debug:
                        print('info string Reusing node from ponder.')
                    break

        # If we weren't able to find the board, make a new node.
        # Note the node.children check: If the node is a reused node and
        # at a repeated position, it will think the game is over, but we
        # still want it to continue playing.
        if not self.node or self.node.board != board or not self.node.children:
            vec = self.args.model.from_scratch(board)
            self.node = mcts.Node(board, vec, None, 0, self.args)
            if self.args.debug:
                print('info string Creating new root node.')

        # Print priors for new root node.
        while self.node.N < 2:
            # Ensure children are expanded
            self.node.rollout()
        nodes = sorted(self.node.children, key=lambda n: n.P, reverse=True)[:7]
        print('info string priors',
              ', '.join(f'{board.san(n.move)} {n.P:.1%}' for n in nodes))

        # Find move to play
        self.should_stop = False
        kl_div = 1
        rolls = 0
        start_time = time.time()
        if use_mcts:
            first = True
            for i in itertools.count():
                rolls += 1
                self.node.rollout()
                if self.should_stop or \
                        max_time > 0 and time.time() > start_time + max_time or \
                        max_rolls > 0 and rolls >= max_rolls:
                    break
                if (i + 1) % STAT_INTERVAL == 0:
                    kl_div = self.print_stats(first, pvs)
                    if min_kldiv > 0 and kl_div < min_kldiv:
                        break
                    first = False

        # Pick best or random child
        if temperature:
            if use_mcts:
                counts = [(n.N / self.node.N)**(1 / temperature)
                          for n in self.node.children]
            else:
                counts = [n.P**(1 / temperature) for n in self.node.children]
            node = random.choices(self.node.children, weights=counts)[0]
            if self.args.debug:
                o = sorted(self.node.children, key=lambda n: -n.N).index(node)
                # From https://codegolf.stackexchange.com/questions/4707#answer-4712
                ordinal = (lambda n: "%d%s" % (n, "tsnrhtdd"[
                    (n / 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]))(o + 1)
            self.node = node
        else:
            self.node = max(self.node.children, key=lambda n: n.N)

        stats = Stats(kl_div, rolls, time.time() - start_time)
        return self.node, stats
Ejemplo n.º 17
0
 def reset(self):
     self.state = game.State()
     self.root = mcts.Node(None, None)
Ejemplo n.º 18
0
    def InputEnemy(self):
        nb, nw = self.Count()
        nmTurn = 64 - nb - nw
        if nmTurn < 16:
            site, maxScore, sumScore, self.table = moveAITrain(
                self.black, self.white, self.turn, nmTurn, self.table, nw - nb)
            if maxScore > 0:
                maxScore = 1
            elif maxScore < 0:
                maxScore = -1
            site.sort(reverse=True)
            pi = np.zeros(64)
            for move in site:
                pi[bitFind(move[1])] = move[0] / (sumScore if sumScore else 1)
            self.addHistory(pi, self.turn)
            temp = copy.deepcopy(self.history)
            self.addValue(self.history, maxScore)
            for i in range(len(site) // 4 + 1):
                history2 = copy.deepcopy(temp)
                self.AddSite(site[i][1])
                self.NextBoard()
                site2, maxScore2, sumScore2, self.table = moveAITrain(
                    self.black, self.white, self.turn ^ 1, nmTurn - 1,
                    self.table, nw - nb)
                self.NextBoard()
                maxScore2 = -maxScore2
                if maxScore2 > 0:
                    maxScore2 = 1
                elif maxScore2 < 0:
                    maxScore2 = -1

                if site2:
                    site2.sort(reverse=True)
                    pi2 = np.zeros([64])
                    for move2 in site2:
                        pi2[bitFind(move2[1])] = move2[0] / (sumScore2 if
                                                             sumScore2 else 1)
                    features = []
                    features.append(self.black)
                    features.append(self.white)
                    features.append(self.judge)
                    history2.append([features, pi2, self.turn ^ 1])
                    self.addValue(history2, maxScore2)
                    self.history.extend(history2)
            self.AddSite(0)

        elif nmTurn < RAND:
            self.currentNode = mcts.Node(
                mcts.FakeNode(), 0, self.turn,
                Core(self.turn, self.black, self.white))
            self.currentNode.isGameRoot = True
            self.currentNode.isSearchRoot = True
            pi = self.mctsBatch.alpha([self.currentNode], 1)[0]
            move = int(np.argmax(pi))
            self.addHistory(pi, self.turn)
            self.currentNode = self.makeMove(self.currentNode, move)

        else:
            while True:
                move = random.randrange(0, 64)
                site = 1 << move
                if self.judge & site:
                    # self.AddSite(site)
                    self.currentNode = self.makeMove(self.currentNode, move)
                    break
Ejemplo n.º 19
0
 def buildMCTS(self, state):
     lg.logger_mcts.info(
         '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
     self.root = mcts.Node(state)
     self.mcts = mcts.MCTS(self.root, self.cpuct)