コード例 #1
0
    def create_response(self,
                        game,
                        status,
                        num_trials=0,
                        current_trials=0,
                        proven=False,
                        moves=None,
                        P=None):

        resp = {
            "status": status,
            "current": current_trials,
            "max": num_trials,
            "proven": proven
        }

        if P is not None:
            resp["P"] = P.tolist() if type(P) != list else P

        if not moves:
            indices = numpy.argsort(self.root.N)[::-1][:twixt.MAXBEST]
            resp["moves"] = [
                naf.policy_index_point(game.turn, i) for i in indices
            ]
            resp["Y"] = [int(n) for n in self.root.N[indices].tolist()]
            resp["P"] = [
                int(round(p * 1000)) for p in self.root.P[indices].tolist()
            ]
            # resp["Q"] = self.root.Q[indices].tolist()
        else:
            resp["moves"] = moves

        return resp
コード例 #2
0
 def top_moves_str(self, game):
     indices = numpy.argsort(self.root.P[self.root.LMnz])
     pts = [
         str(naf.policy_index_point(game, self.root.LMnz[0][index]))
         for index in indices[-3:]
     ]
     return ":" + ",".join(pts)
コード例 #3
0
    def eval_game(self, game, maxbest=twixt.MAXBEST):

        self.compute_root(game)
        # assert self.root == None
        self.root = self.expand_leaf(game)
        top_ixs = numpy.argsort(self.root.P)[-maxbest:]
        moves = [naf.policy_index_point(game, ix) for ix in top_ixs][::-1]
        P = [int(round(self.root.P[ix] * 1000)) for ix in top_ixs][::-1]
        self.logger.debug("moves: %s, idx: %s", moves, top_ixs)
        return self.root.score, moves, P
コード例 #4
0
    def traverse(self, game, path, level, node):

        k = numpy.argmax(node.N)
        n = node.N[k]
        if n > 0:
            sn = node.subnodes[k]
            move = naf.policy_index_point(game.turn % 2, k)
            game.play(move)

            self.board.create_move_objects(len(game.history) - 1, n)
            path.append(self.board.history[-1])

            if sn is not None:
                self.traverse(game, path, level + 1, sn)

            game.undo()
            self.board.history.pop()
コード例 #5
0
    def visit_node(self, game, node, top=False, trials=None):
        """ Visit a node, return the evaluation from the
            point of view of the player currently to play. """

        assert not game.just_won()
        if not node.LM.any():
            self.proven = True
            if node.drawing_move:
                self.score = 0
            else:
                # all moves lose.  very sad.
                self.score = -1
            return self.score

        if top and self.smart_root:
            vnz = node.N[node.LMnz]
            maxn = vnz.max()
            winnables = (node.N > maxn - trials) & node.LM
            num_winnables = numpy.count_nonzero(winnables)
            assert num_winnables > 0, (maxn, trials, numpy.array_str(node.N),
                                       numpy.array_str(node.LM))
            if num_winnables == 1:
                index = winnables.argmax()
            else:
                maxes = (node.N == maxn)
                num_maxes = numpy.count_nonzero(maxes)
                assert num_maxes > 0
                if (num_maxes == 1
                        and maxn - vnz[numpy.argsort(vnz)[-2:]][0] > 1):
                    # visit diff to second best is >1
                    winnables[maxes.argmax()] = 0

                nsum = node.N.sum()  # don't need to filter since all are 0
                stv = math.sqrt(nsum + 1.0)
                U = node.Q + self.cpuct * node.P * stv / (1.0 + node.N)

                wnz = numpy.nonzero(winnables)
                nz_index = U[wnz].argmax()
                index = wnz[0][nz_index]
        else:
            # At least one node worth visiting.  Figure out which one to
            # visit...
            nsum = node.N.sum()  # don't need to filter since all are 0
            stv = math.sqrt(nsum + 1.0)
            U = node.Q + self.cpuct * node.P * stv / (1.0 + node.N)

            nz_index = U[node.LMnz].argmax()
            index = node.LMnz[0][nz_index]

        move = naf.policy_index_point(game.turn, index)

        if top:
            self.logger.debug("selecting index=%d move=%s Q=%.3f P=%.5f N=%d",
                              index, str(move), node.Q[index], node.P[index],
                              node.N[index])

        subnode = node.subnodes[index]

        game.play(move)

        # cif self.visualize_mcts:
        #    self.board.create_move_objects(len(game.history)-1, True)

        if subnode:
            subscore = -self.visit_node(game, subnode)
        else:
            subnode = self.expand_leaf(game)
            node.subnodes[index] = subnode
            subscore = -subnode.score

        # if self.visualize_mcts:
        #    self.board.undo_last_move_objects()

        game.undo()

        node.N[index] += 1
        if subnode.proven:
            node.Q[index] = subscore
            node.LM[index] = 0
            node.LMnz = node.LM.nonzero()
            if subscore == 1:
                node.proven = True
                node.winning_move = move
            elif subscore == 0:
                node.drawing_move = move
        else:
            node.Q[index] += (subscore - node.Q[index]) / node.N[index]

        return subscore