Ejemplo n.º 1
0
 def get_action(self, board, temp=1e-3):
     #sensible_moves = board.availables
     # the pi vector returned by MCTS as in the alphaGo Zero paper
     if self._is_selfplay:
         temp = 1.5
     move_probs = np.zeros(15 * 15)
     acts, probs = self.mcts.get_move_probs(board, temp)
     if acts is None:  #ai认输
         return None, None
     move_probs[list(acts)] = probs
     best_chance = np.max(move_probs)
     best_move = np.where(move_probs == best_chance)[0][0]
     if self._is_selfplay:
         move = np.random.choice(
             acts,
             p=probs
             #p=0.9*probs + 0.1*np.random.dirichlet(0.3*np.ones(len(probs)))
         )
         #debug
         print("choose ", RenjuBoard.number2pos(move), "by prob ",
               move_probs[move])
         print("best move is ", RenjuBoard.number2pos(best_move),
               best_chance)
         # update the root node and reuse the search tree
     else:
         # with the default temp=1e-3, it is almost equivalent
         # to choosing the move with the highest prob
         #move = np.random.choice(acts, p=probs)
         move = best_move
         # reset the root node
         #self.mcts.update_with_move(-1)
     self.mcts.update_with_move(board, move)
     return move, move_probs
Ejemplo n.º 2
0
 def _debug(self):
     if self.debug_mode:
         for act, _sub_node in self._root._children.items():
             if _sub_node._n_visits > 0:
                 print(RenjuBoard.number2pos(act), "\tsel ",
                       _sub_node.get_value(self._c_puct), "\tv ",
                       _sub_node._n_visits, "\tQ ", _sub_node._Q, "\tp ",
                       _sub_node._P)