def prior_knowledge_cmd(self, args): ''' ''' # get moves and probs for legal moves using function given in GoBoardUtilGo4, but with pass move moves, probs = GoBoardUtilGo4.generate_moves_with_feature_based_probs( self.board) priorKnowledgeList = GoBoardUtilGo4.prior_knowledge_initialization( moves, probs) color = self.board.current_player response = "" # convert moves from format 9 to a1 for move in priorKnowledgeList: # jank? need to print PASS as Pass if move[0] == 'PASS': move[0] = 'Pass' else: move[0] = GoBoardUtil.format_point( self.board._point_to_coord(move[0])) # sort by winrate, breaking ties alphabetically sortedList = sorted(priorKnowledgeList, key=lambda x: x[0]) sortedList = sorted(sortedList, key=lambda x: x[3], reverse=True) for move in sortedList: response += move[0] + ' ' + str(int( (round(move[1])))) + ' ' + str(int(round((move[2])))) + ' ' self.respond(response)
def simulate(self, board, cboard, move, toplay): GoBoardUtilGo4.copyb2b(board, cboard) assert cboard.board.all() == board.board.all() cboard.move(move, toplay) opp = GoBoardUtilGo4.opponent(toplay) return GoBoardUtilGo4.playGame(cboard, opp, komi=self.komi, limit=self.limit, simulation_policy=simulations, use_pattern=self.use_pattern, check_selfatari=self.check_selfatari)
def prior_knowledge_stat(self, board, root, color): s_color = GoBoardUtilGo4.int_to_color(color) stats = [] for move, node in root._children.items(): if color == BLACK: wins = node._black_wins else: wins = node._n_visits - node._black_wins visits = node._n_visits if visits: win_rate = round(float(wins) / visits, 2) else: win_rate = 0 if move == PASS: move = None pointString = board.point_to_string(move) stats.append((pointString, wins, visits, win_rate)) lst = sorted(stats, key=lambda i: i[3], reverse=True) master = [] for stuff in lst: x = len(stuff) - 1 # print(stuff[0:x]) rnum = int(round(stuff[2])) # print(int(round(stuff[2]))) lst1 = [stuff[0], stuff[1], rnum] master.append(lst1) #sys.stderr.write("Sstatistics: {} \n".format(lst)) sys.stderr.flush() return master
def print_stat(self, board, root, color): s_color = GoBoardUtilGo4.int_to_color(color) sys.stderr.write("Number of children {} \n".format(len( root._children))) sys.stderr.flush() sys.stderr.write("Number of roots visits: {} \n".format( root._n_visits)) sys.stderr.flush() stats = [] for move, node in root._children.items(): if color == BLACK: wins = node._black_wins else: wins = node._n_visits - node._black_wins visits = node._n_visits if visits: win_rate = round(float(wins) / visits, 2) else: win_rate = 0 if move == PASS: move = None pointString = board.point_to_string(move) stats.append((pointString, win_rate, wins, visits)) sys.stderr.write("Statistics: {} \n".format( sorted(stats, key=lambda i: i[3], reverse=True))) sys.stderr.flush()
def prior_knowledge_cmd(self,args): move, probs = self.probability(self.board) sim_probs = self.sim(probs, move, self.board) win_rate = self.winrates(probs, move, self.board) # print("winrates " + str(win_rate)) wins = np.zeros(self.board.maxpoint) for num1 in range(len(move)): for num2 in range(0, len(move)-num1-1): if move[num2] != move[num2+1]: if win_rate[move[num2]] < win_rate[move[num2+1]]: move[num2], move[num2+1] = move[num2+1], move[num2] elif win_rate[move[num2]]==win_rate[move[num2+1]]: move1 = GoBoardUtilGo4.format_point(self.board._point_to_coord(move[num2])) move2 = GoBoardUtilGo4.format_point(self.board._point_to_coord(move[num2+1])) if(move1[0]>move2[0]): move[num2] , move[num2+1] = move[num2+1] , move[num2] for elem in move: # print(GoBoardUtilGo4.format_point(self.board._point_to_coord(elem)), sim_probs[elem], win_rate[elem]) wins[elem] = int(round(sim_probs[elem] * win_rate[elem])) # after calculating wins, we need to round simulation sim_probs[elem] = int(round(sim_probs[elem])) # wins[elem] = sim_probs[elem] * win_rate[elem] values = [] for elem in move: # print(elem) elem2 = elem if elem == 0: elem2 = 'Pass' # print((elem2), sim_probs[elem], wins[elem]) values.append(elem2) values.append(int(wins[elem])) values.append(int(sim_probs[elem])) else: # print(GoBoardUtilGo4.format_point(self.board._point_to_coord(elem2)), sim_probs[elem], wins[elem]) values.append(GoBoardUtilGo4.format_point(self.board._point_to_coord(elem))) values.append(int(wins[elem])) values.append(int(sim_probs[elem])) str1 = ' '.join(str(e) for e in values) self.respond(''.join(str1))
def good_print(self, board, node, color, num_nodes): cboard = board.copy() sys.stderr.write("\nTaking a tour of selection policy in tree! \n\n") sys.stderr.write(cboard.get_twoD_board()) sys.stderr.flush() while not node.is_leaf(): if node._move != None: if node._move != PASS: pointString = board.point_to_string(move) else: pointString = node._move else: pointString = 'Root' sys.stderr.write("\nMove: {} Numebr of children {}, Number of visits: {}\n" .format(pointString,len(node._children),node._n_visits)) sys.stderr.flush() moves_ls = [] max_flag = color == BLACK for move,child in node._children.items(): uctval = uct_val(node,child,self.exploration,max_flag) moves_ls.append((move,uctval,child)) moves_ls = sorted(moves_ls,key=lambda i:i[1],reverse=True) if moves_ls: sys.stderr.write("\nPrinting {} of {} childs that have highest UCT value \n\n".format(num_nodes, pointString)) sys.stderr.flush() for i in range(num_nodes): move = moves_ls[i][0] child_val = moves_ls[i][1] child_node = moves_ls[i][2] if move !=PASS: sys.stderr.write("\nChild point:{} ;UCT Value {}; Number of visits: {}; Number of Black wins: {}\n" .format(cboard.point_to_string(move), child_val, child_node._n_visits, child_node._black_wins)) sys.stderr.flush() else: sys.stderr.write("\nChild point:{} ;UCT Value {}; Number of visits: {}; Number of Black wins: {} \n" .format(move, child_val, child_node._n_visits, child_node._black_wins)) sys.stderr.flush() # Greedily select next move. max_flag = color == BLACK move, next_node = node.select(self.exploration,max_flag) if move==PASS: move = None assert cboard.check_legal(move, color) pointString = cboard.point_to_string(move) cboard.move(move, color) sys.stderr.write("\nBoard in simulation after chosing child {} in tree. \n".format(pointString)) sys.stderr.write(cboard.get_twoD_board()) sys.stderr.flush() color = GoBoardUtilGo4.opponent(color) node = next_node assert node.is_leaf() cboard.current_player = color leaf_value = self._evaluate_rollout(cboard, color) sys.stderr.write("\nWinner of simulation is: {} color, Black is 0 an \n".format(leaf_value)) sys.stderr.flush()
def update_with_move(self, last_move): """ Step forward in the tree, keeping everything we already know about the subtree, assuming that get_move() has been called already. Siblings of the new root will be garbage-collected. """ if last_move in self._root._children: self._root = self._root._children[last_move] else: self._root = TreeNode(None) self._root._parent = None self.toplay = GoBoardUtilGo4.opponent(self.toplay)
def writeMoves(board, moves, count, numSimulations): gtp_moves = [] for i in range(len(moves)): if moves[i] != None: x, y = board._point_to_coord(moves[i]) gtp_moves.append((GoBoardUtilGo4.format_point( (x, y)), float(count[i]) / float(numSimulations))) else: gtp_moves.append(('Pass', float(count[i]) / float(numSimulations))) sys.stderr.write("win rates: {}\n".format( sorted(gtp_moves, key=byPercentage, reverse=True))) sys.stderr.flush()
def genmove_cmd(self, args): """ generate a move for the specified color Arguments --------- args[0] : {'b','w'} the color to generate a move for it gets converted to Black --> 1 White --> 2 color : {0,1} board_color : {'b','w'} """ try: board_color = args[0].lower() color = GoBoardUtilGo4.color_to_int(board_color) self.debug_msg("Board:\n{}\nko: {}\n".format(str(self.board.get_twoD_board()), self.board.ko_constraint)) move = self.go_engine.get_move(self.board, color) if move is None: self.respond("pass") return if not self.board.check_legal(move, color): move = self.board._point_to_coord(move) board_move = GoBoardUtilGo4.format_point(move) self.respond("Illegal move: {}".format(board_move)) raise RuntimeError("Illegal move given by engine") # move is legal; play it self.board.move(move,color) self.debug_msg("Move: {}\nBoard: \n{}\n".format(move, str(self.board.get_twoD_board()))) move = self.board._point_to_coord(move) board_move = GoBoardUtilGo4.format_point(move) self.respond(board_move) except Exception as e: self.respond('Error: {}'.format(str(e))) raise
def _playout(self, board, color): """ Run a single playout from the root to the given depth, getting a value at the leaf and propagating it back through its parents. State is modified in-place, so a copy must be provided. Arguments: board -- a copy of the board. color -- color to play Returns: None """ node = self._root node._use_knowledge = self.in_tree_knowledge # This will be True only once for the root if not node._expanded: node.expand(board, color) #Avoid the division by zero errors if self.in_tree_knowledge == "probabilistic": b_wins_sum, n_visit_sum = 0,0 for child in node._children.values(): b_wins_sum += child._black_wins n_visit_sum += child._n_visits node._black_wins = b_wins_sum node._n_visits = n_visit_sum while not node.is_leaf(): # Greedily select next move. max_flag = color == BLACK move, next_node = node.select(self.exploration,max_flag) if move!=PASS: assert board.check_legal(move, color) if move == PASS: move = None board.move(move, color) color = GoBoardUtilGo4.opponent(color) node = next_node assert node.is_leaf() if not node._expanded: node.expand(board, color) assert board.current_player == color leaf_value = self._evaluate_rollout(board, color) # Update value and visit count of nodes in this traversal. node.update_recursive(leaf_value)
def _evaluate_rollout(self, board, toplay): """ Use the rollout policy to play until the end of the game, returning +1 if the current player wins, -1 if the opponent wins, and 0 if it is a tie. """ winner = GoBoardUtilGo4.playGame(board, toplay, komi=self.komi, limit=self.limit, simulation_policy=self.simulation_policy, use_pattern = self.use_pattern, check_selfatari= self.check_selfatari) if winner == BLACK: return 1 else: return 0
def prior_knowledge_cmd(self, args): """ Return list of policy moves for the current_player of the board """ policy_moves, _ = GoBoardUtilGo4.generate_all_policy_moves( self.board, self.go_engine.use_pattern, self.go_engine.check_selfatari) #policy_list.append("Pass") self.MCTS = MCTS() move_set = self.get_move(self.board, self.MCTS.toplay) #lst=self.MCTS.prior_knowledge_stat(self.board, self.MCTS._root, self.MCTS.toplay) move_string = "" for m_set in move_set: for m in m_set: move_string += str(m) + " " #print(move_string) move_stats = [] self.respond("Statistics: " + str(move_string))
def expandWithKnowledge(self, board, color): # how do i do initialize root values to be the sum of its children, need to do moves, probs = GoBoardUtilGo4.generate_moves_with_feature_based_probs( board) maxProb = max(probs) passProb = probs[-1] # move, wins, simulations priorKnowledgeList = [] # populate priorKnowledgeList to check for moves and children for move in moves: # check for pass move if move == 'PASS': simulation = 10 * (passProb / maxProb) wins = (((passProb / maxProb) / 2) + 0.5) * simulation priorKnowledgeList.append([move, wins, simulation]) # move is not pass else: simulation = 10 * (probs[move] / maxProb) wins = (((probs[move] / maxProb) / 2) + 0.5) * simulation priorKnowledgeList.append([move, wins, simulation]) empty_points = board.get_empty_points() for move in empty_points: # move needs to be a legal one as well before init if move not in self._children and board.check_legal( move, color) and not board.is_eye(move, color): self._children[move] = TreeNode(self) self._children[move]._move = move for point in priorKnowledgeList: # need to replace 0's with values if point[0] == move: self._children[move]._black_wins = point[1] self._children[move]._n_visits = point[2] self._children[PASS] = TreeNode(self) self._children[PASS]._move = PASS self._children[PASS]._black_wins = priorKnowledgeList[-1][1] self._children[PASS]._n_visits = priorKnowledgeList[-1][2] self._expanded = True
def _playout(self, board, color): """ Run a single playout from the root to the given depth, getting a value at the leaf and propagating it back through its parents. State is modified in-place, so a copy must be provided. Arguments: board -- a copy of the board. color -- color to play Returns: None """ node = self._root # This will be True only once for the root if not node._expanded: node.expand(board, color) while not node.is_leaf(): # Greedily select next move. max_flag = color == BLACK move, next_node = node.select(self.exploration, max_flag) if move != PASS: assert board.check_legal(move, color) if move == PASS: move = None board.move(move, color) color = GoBoardUtilGo4.opponent(color) node = next_node assert node.is_leaf() if not node._expanded: node.expand(board, color) assert board.current_player == color leaf_value = self._evaluate_rollout(board, color) # Update value and visit count of nodes in this traversal. node.update_recursive(leaf_value)