Ejemplo n.º 1
0
    def _go_to_child(self, parent, child_index):
        if debug:
            print "go_to @", self._stack_string()
        move = naf.policy_index_point(self.game.turn, child_index)
        subnode = parent.subnodes[child_index]
        self.game.play(move)
        parent.Ns[child_index] += 1

        if subnode:
            gc_index = self._pick_expand_index(subnode)
            if gc_index == "wait":
                parent.Ns[child_index] -= 1
                r = 0
            elif gc_index == "lose":
                subnode.proven = True
                subnode.winning_index = None
                subnode.score = -1
                self.finished_leves.append(subnode)
                r = 1
            else:
                assert gc_index >= 0
                r = self._go_to_child(subnode, gc_index)
        else:
            parent.subnodes[child_index] = self._expand_leaf(
                parent, child_index)
            r = 1

        self.game.undo()
        return r
Ejemplo n.º 2
0
    def pick_move(self, game):
        if self.use_swap and len(game.history) < 2:
            if len(game.history) == 0:
                self.report = "swapmodel"
                return swapmodel.choose_first_move()
            elif swapmodel.want_swap(game.history[0]):
                self.report = "swapmodel"
                return "swap"
            # else didn't want swap so compute a regular move

        N = self.nm.mcts(game, self.num_trials)
        self.report = self.nm.report

        # When a forcing win or forcing draw move is found, there's no policy
        # array returned
        if isinstance(N, (str, twixt.Point)):
            return N

        if self.temperature == 0.0:
            mx = N.max()
            weights = numpy.where(N == mx, 1.0, 0.0)
        elif self.temperature == 1.0:
            weights = N
        elif self.temperature == 0.5:
            weights = N**2
        if self.verbosity >= 2:
            print "weights=", weights
        index = numpy.random.choice(numpy.arange(len(weights)),
                                    p=weights / weights.sum())

        return naf.policy_index_point(game, index), N
Ejemplo n.º 3
0
 def _top_moves_str(self):
     indices = numpy.argsort(self.root.P[self.root.LMnz])
     pts = [
         str(naf.policy_index_point(self.game, self.root.LMnz[0][index]))
         for index in indices[-3:]
     ]
     return ":" + ",".join(pts)
Ejemplo n.º 4
0
def show_game_with_p(game, P):
    fig, ax = plt.subplots()
    D = numpy.zeros((SIZE, SIZE))

    for i, p in enumerate(P):
        point = naf.policy_index_point(game, i)
        print point, p
        D[point.y, point.x] = p

    ax.imshow(D)

    game_to_axes(game, ax)
    plt.show()
Ejemplo n.º 5
0
    def _node_to_pv(self, node, color):
        descend_index = None
        if node.winning_index is not None:
            descend_index = node.winning_index
        else:
            nmax = node.Nf.max()
            if nmax < 2:
                return []
            Nfnz = (node.Nf == nmax).nonzero()
            i1 = node.Q[Nfnz].argmax()
            i2 = Nfnz[0][i1]
            descend_index = i2

        move = naf.policy_index_point(color, descend_index)
        sub = self._node_to_pv(node.subnodes[descend_index], 1 - color)
        sub.insert(0, str(move))
        return sub
Ejemplo n.º 6
0
    def mcts(self, game, trials):
        """ Using the neural net, compute the move visit count vector """
        self.compute_root(game)
        if self.root == None:
            self.root = self.expand_leaf(game)
            self.history_at_root = list(game.history)
            if self.verbosity >= 1:
                top_ixs = numpy.argsort(self.root.P)[-5:]
                print "eval=%.3f  %s" % (self.root.score, " ".join([
                    "%s:%d" % (naf.policy_index_point(
                        game, ix), int(self.root.P[ix] * 10000 + 0.5))
                    for ix in top_ixs
                ]))

        if not self.root.proven:
            for i in range(trials):
                assert not self.root.proven

                self.visit_node(game, self.root, True, trials - i)
                if self.root.proven:
                    break

        if self.root.proven:
            if self.root.winning_move:
                self.report = "fwin" + self.top_moves_str(game)
                return self.root.winning_move
            elif self.root.drawing_move:
                self.report = "fdraw" + self.top_moves_str(game)
                return self.root.drawing_move
            else:
                self.report = "flose" + self.top_moves_str(game)
                return "resign"

        if self.verbosity >= 2:
            print "N=", self.root.N
            print "Q=", self.root.Q

        self.report = "%6.3f" % (self.root.Q[numpy.argmax(
            self.root.N)]) + self.top_moves_str(game)
        return self.root.N
Ejemplo n.º 7
0
                    type=str,
                    nargs='+',
                    help='a file with selfplay binary logs')
args = parser.parse_args()

for p in args.positions:
    colon = p.find(':')
    if colon == -1:
        raise Exception("Required position format <file>:<index>")
    filename = p[:colon]
    index = int(p[colon + 1:])

    f = open(filename, "rb")
    f.seek(index * naf.LearningState.NUM_BYTES)
    b = f.read(naf.LearningState.NUM_BYTES)
    ts = naf.LearningState.from_bytes(b)

    print "evaluation: %d" % (ts.z),
    for i in range(len(ts.N)):
        if i % 8 == 0:
            print
        print "%3s:%-4d" % (naf.policy_index_point(twixt.Game.WHITE,
                                                   i), ts.N[i]),
    print

    if args.display:
        import ui
        tb = ui.TwixtBoardWindow(p)
        tb.set_naf(ts.naf)
        tb.win.getMouse()
Ejemplo n.º 8
0
    def pick_move(self, game):
        if len(game.history) == 0:
            self.report = "swapmodel"
            return swapmodel.choose_first_move()
        elif len(game.history) == 1 and self.use_swap:
            if swapmodel.want_swap(game.history[0]):
                self.report = "swapmodel"
                return "swap"

        # If we get here, it is move 2+ and either we cannot or do not swap.
        self.game = game
        self._preload_root()
        self.finished_leaves = []
        self.leaves_waiting = 0
        self.num_evals = 0

        if self.root == None:
            self.root = self._expand_leaf(None, None)
            self.history_at_root = list(self.game.history)
            self._block_until_reply()
            num_evals = 1

        # Let's start thinkin'!
        request_block = False
        while not self.root.proven and self.num_evals < self.trials:
            self._process_incoming_data(request_block)
            request_block = False
            self._process_finished_leaves()

            if self.root.proven:
                break

            ei = self._pick_root_expand_index(self.trials - self.num_evals)
            if ei >= 0:
                r = self._go_to_child(self.root, ei)
                if r == 0:
                    assert self.leaves_waiting > 0
                    request_block = True
            elif self.leaves_waiting == 0:
                # no moves worth evaluating; means we lost.
                self.root.proven = True
            else:
                request_block = True

        # Okay, done thinkin'.  flush out any waiting kids
        while self.leaves_waiting > 0:
            self._block_until_reply()
        self._process_finished_leaves()

        if self.root.proven:
            if self.root.winning_index is not None:
                self.report = "fwin"
                return naf.policy_index_point(game, self.root.winning_index)
            else:
                self.report = "flose"
                return "resign"

        assert numpy.array_equal(self.root.Nf,
                                 self.root.Ns), (self.root.Nf.sum(),
                                                 self.root.Ns.sum())
        good_moves = numpy.where(self.root.Nf == self.root.Nf.max())
        index = random.choice(good_moves[0])
        move = naf.policy_index_point(game, index)

        self.report = "%6.3f %s" % (self.root.Q[index],
                                    self._principal_var_str())
        return move
Ejemplo n.º 9
0
    nips.rotate(ROT)
    pw, ml = ne.eval_one(nips)
    ml = naf.rotate_policy_array(ml[0,:], ROT)
    print "pwin=",pw

    LM = naf.legal_move_policy_array(game)
    LMnz = LM.nonzero()
    max_ml = ml[LMnz].max()
    el = numpy.exp(ml - max_ml)
    divisor = el[LMnz].sum()
    P = el / divisor
    # print P
    P[(1-LM).nonzero()] = 0
    inds = numpy.argsort(P)
    for idx, i in enumerate(reversed(inds[-40:])):
        coord = naf.policy_index_point(game, i)
        possible_moves.append(coord)
        if idx<10:
            print "%3s %6.2f" % (str(coord), P[i]*100.0)

    #import mpui
    #mpui.show_game_with_p(game, P)

if args.thinker:
    thinker = twixt.get_thinker(args.thinker, resources)
    tup = thinker.pick_move(game)
    if type(tup) == tuple:
	m, n = thinker.pick_move(game)
    else :
	m = tup
Ejemplo n.º 10
0
    def visit_node(self, game, node, top=False, trials_left=-1):
        """ Visit a node, return the evaluation from the
	    point of view of the player currently to play. """

        assert not game.just_won()
        if not node.LM.any():
            self.proven = True
            if node.drawing_move:
                self.score = 0
            else:
                # all moves lose.  very sad.
                self.score = -1
            return self.score

        if top and self.smart_root:
            maxn = node.N[node.LMnz].max()
            winnables = (node.N > maxn - trials_left) & node.LM
            num_winnables = numpy.count_nonzero(winnables)
            assert num_winnables > 0, (maxn, trials_left,
                                       numpy.array_str(node.N),
                                       numpy.array_str(node.LM))
            if num_winnables == 1:
                index = winnables.argmax()
            else:
                maxes = (node.N == maxn)
                num_maxes = numpy.count_nonzero(maxes)
                assert num_maxes > 0
                if num_maxes == 1:
                    winnables[maxes.argmax()] = 0

                nsum = node.N.sum()  # don't need to filter since all are 0
                stv = math.sqrt(nsum + 1.0)
                U = node.Q + node.P * stv / (1.0 + node.N)
                wnz = numpy.nonzero(winnables)
                nz_index = U[wnz].argmax()
                index = wnz[0][nz_index]
        else:
            # At least one node worth visiting.  Figure out which one to visit...
            nsum = node.N.sum()  # don't need to filter since all are 0
            stv = math.sqrt(nsum + 1.0)
            U = node.Q + node.P * stv / (1.0 + node.N)
            nz_index = U[node.LMnz].argmax()
            index = node.LMnz[0][nz_index]

        move = naf.policy_index_point(game.turn, index)

        if top and self.verbosity >= 3:
            for i in range(len(U)):
                m = naf.policy_index_point(game.turn, i)

        if top and self.verbosity > 1:
            print "selecting index=%d move=%s Q=%.3f P=%.5f N=%d" % (
                index, str(move), node.Q[index], node.P[index], node.N[index])

        subnode = node.subnodes[index]

        game.play(move)
        if subnode:
            subscore = -self.visit_node(game, subnode)
        else:
            subnode = self.expand_leaf(game)
            node.subnodes[index] = subnode
            subscore = -subnode.score
        game.undo()

        node.N[index] += 1
        if subnode.proven:
            node.Q[index] = subscore
            node.LM[index] = 0
            node.LMnz = node.LM.nonzero()
            if subscore == 1:
                node.proven = True
                node.winning_move = move
            elif subscore == 0:
                node.drawing_move = move
        else:
            oldq = node.Q[index]
            node.Q[index] += (subscore - node.Q[index]) / node.N[index]

        return subscore