def get_action(self, board, temp=1e-3): #sensible_moves = board.availables # the pi vector returned by MCTS as in the alphaGo Zero paper if self._is_selfplay: temp = 1.5 move_probs = np.zeros(15 * 15) acts, probs = self.mcts.get_move_probs(board, temp) if acts is None: #ai认输 return None, None move_probs[list(acts)] = probs best_chance = np.max(move_probs) best_move = np.where(move_probs == best_chance)[0][0] if self._is_selfplay: move = np.random.choice( acts, p=probs #p=0.9*probs + 0.1*np.random.dirichlet(0.3*np.ones(len(probs))) ) #debug print("choose ", RenjuBoard.number2pos(move), "by prob ", move_probs[move]) print("best move is ", RenjuBoard.number2pos(best_move), best_chance) # update the root node and reuse the search tree else: # with the default temp=1e-3, it is almost equivalent # to choosing the move with the highest prob #move = np.random.choice(acts, p=probs) move = best_move # reset the root node #self.mcts.update_with_move(-1) self.mcts.update_with_move(board, move) return move, move_probs
def _debug(self): if self.debug_mode: for act, _sub_node in self._root._children.items(): if _sub_node._n_visits > 0: print(RenjuBoard.number2pos(act), "\tsel ", _sub_node.get_value(self._c_puct), "\tv ", _sub_node._n_visits, "\tQ ", _sub_node._Q, "\tp ", _sub_node._P)