def get_move(self, game_state: go.GameState): """Runs all playouts sequentially and returns the most visited action. game_state: the current game state Return: the selected action """ for _ in range(self._n_playout): state_copy = game_state.copy() self._playout(state_copy) if self._root.is_leaf(): return PASS_MOVE return max(self._root._children.items(), key=lambda act_node: act_node[1]._n_visits)[0]
def get_move_probs(self, game_state: go.GameState): """Run all playouts sequentially and return the available actions and their corresponding probabilities. state: the current game state temp: temperature parameter in (0, 1] controls the level of exploration """ for _ in range(self._n_playout): state_copy = game_state.copy() self._playout(state_copy) if self._root.is_leaf(): return None, None # calc the move probabilities based on visit counts at the root node act_visits = [ (act, node._n_visits) for act, node in self._root._children.items() ] acts, visits = zip(*act_visits) act_probs = softmax(1.0 / self._temperature * np.log(np.array(visits) + 1e-10)) return acts, act_probs