def create_response(self, game, status, num_trials=0, current_trials=0, proven=False, moves=None, P=None): resp = { "status": status, "current": current_trials, "max": num_trials, "proven": proven } if P is not None: resp["P"] = P.tolist() if type(P) != list else P if not moves: indices = numpy.argsort(self.root.N)[::-1][:twixt.MAXBEST] resp["moves"] = [ naf.policy_index_point(game.turn, i) for i in indices ] resp["Y"] = [int(n) for n in self.root.N[indices].tolist()] resp["P"] = [ int(round(p * 1000)) for p in self.root.P[indices].tolist() ] # resp["Q"] = self.root.Q[indices].tolist() else: resp["moves"] = moves return resp
def top_moves_str(self, game): indices = numpy.argsort(self.root.P[self.root.LMnz]) pts = [ str(naf.policy_index_point(game, self.root.LMnz[0][index])) for index in indices[-3:] ] return ":" + ",".join(pts)
def eval_game(self, game, maxbest=twixt.MAXBEST): self.compute_root(game) # assert self.root == None self.root = self.expand_leaf(game) top_ixs = numpy.argsort(self.root.P)[-maxbest:] moves = [naf.policy_index_point(game, ix) for ix in top_ixs][::-1] P = [int(round(self.root.P[ix] * 1000)) for ix in top_ixs][::-1] self.logger.debug("moves: %s, idx: %s", moves, top_ixs) return self.root.score, moves, P
def traverse(self, game, path, level, node): k = numpy.argmax(node.N) n = node.N[k] if n > 0: sn = node.subnodes[k] move = naf.policy_index_point(game.turn % 2, k) game.play(move) self.board.create_move_objects(len(game.history) - 1, n) path.append(self.board.history[-1]) if sn is not None: self.traverse(game, path, level + 1, sn) game.undo() self.board.history.pop()
def visit_node(self, game, node, top=False, trials=None): """ Visit a node, return the evaluation from the point of view of the player currently to play. """ assert not game.just_won() if not node.LM.any(): self.proven = True if node.drawing_move: self.score = 0 else: # all moves lose. very sad. self.score = -1 return self.score if top and self.smart_root: vnz = node.N[node.LMnz] maxn = vnz.max() winnables = (node.N > maxn - trials) & node.LM num_winnables = numpy.count_nonzero(winnables) assert num_winnables > 0, (maxn, trials, numpy.array_str(node.N), numpy.array_str(node.LM)) if num_winnables == 1: index = winnables.argmax() else: maxes = (node.N == maxn) num_maxes = numpy.count_nonzero(maxes) assert num_maxes > 0 if (num_maxes == 1 and maxn - vnz[numpy.argsort(vnz)[-2:]][0] > 1): # visit diff to second best is >1 winnables[maxes.argmax()] = 0 nsum = node.N.sum() # don't need to filter since all are 0 stv = math.sqrt(nsum + 1.0) U = node.Q + self.cpuct * node.P * stv / (1.0 + node.N) wnz = numpy.nonzero(winnables) nz_index = U[wnz].argmax() index = wnz[0][nz_index] else: # At least one node worth visiting. Figure out which one to # visit... nsum = node.N.sum() # don't need to filter since all are 0 stv = math.sqrt(nsum + 1.0) U = node.Q + self.cpuct * node.P * stv / (1.0 + node.N) nz_index = U[node.LMnz].argmax() index = node.LMnz[0][nz_index] move = naf.policy_index_point(game.turn, index) if top: self.logger.debug("selecting index=%d move=%s Q=%.3f P=%.5f N=%d", index, str(move), node.Q[index], node.P[index], node.N[index]) subnode = node.subnodes[index] game.play(move) # cif self.visualize_mcts: # self.board.create_move_objects(len(game.history)-1, True) if subnode: subscore = -self.visit_node(game, subnode) else: subnode = self.expand_leaf(game) node.subnodes[index] = subnode subscore = -subnode.score # if self.visualize_mcts: # self.board.undo_last_move_objects() game.undo() node.N[index] += 1 if subnode.proven: node.Q[index] = subscore node.LM[index] = 0 node.LMnz = node.LM.nonzero() if subscore == 1: node.proven = True node.winning_move = move elif subscore == 0: node.drawing_move = move else: node.Q[index] += (subscore - node.Q[index]) / node.N[index] return subscore