def run(self): self.ai = MCTSPlayer(c_puct=5, n_playout=20) # self.ai.board = self.board move = self.ai.get_action(self.board) self.board.do_move(move) print(move) self.finishSignal.emit(int(move[0]), int(move[1]), int(move[2]), int(move[3]))
class RL_QG_agent(object): def __init__(self): self.temp = 1e-3 # the temperature param self.n_playout = 200 # num of simulations for each move self.c_puct = 5 self.board_width = 8 self.board_height = 8 self.model_path = os.path.join("./models/curr_model_100rollout.pt") #self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params=None) #self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout) self.mcts_player = MCTS_Pure(c_puct=5, n_playout=self.n_playout) self.env = gym.make("Reversi8x8-v0") self.init_model() #self.load_model() def init_model(self): self.board = Board(env=self.env, width=self.board_width, height=self.board_height) self.board.init_board() self.game = Game(self.board) self.have_step = False def place(self, state, enables, player=None): curr_state = bit_to_board(self.board.black, self.board.white) curr_state = 1 - (curr_state[0] + curr_state[1]) reverse_change = np.where((curr_state - state[2]) == -1) if self.have_step == False: pass elif reverse_change[0].shape[0] > 1: self.board.init_board() self.have_step = False curr_state = bit_to_board(self.board.black, self.board.white) curr_state = 1 - (curr_state[0] + curr_state[1]) change = np.where((curr_state - state[2]) == 1) if change[0].shape[0] == 1: action = change[0][0] * self.board_width + change[1][0] self.board.do_move(action) else: if self.have_step == False: pass else: action = 65 self.board.do_move(action) move = self.mcts_player.get_action(self.board) self.board.do_move(move) self.have_step = True return move def load_model(self): self.policy_value_net.policy_value_net.load_state_dict( torch.load(self.model_path))
def run(states, sensible_moves, currentPlayer, lastMove): n = 5 width, height = 8, 8 board = Board(width=width, height=height, n_in_row=n) board.init_board() board.states = states board.availables = sensible_moves board.current_player = currentPlayer board.last_move = lastMove #best_policy = PolicyValueNetNumpy(width, height, policy_param) #mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) #只用纯MCTS mcts_player = MCTS_Pure(c_puct=5, n_playout=4000) # n_playout参数 表示 搜索次数 nextmove = mcts_player.get_action(board) return nextmove
def __init__(self): self.temp = 1e-3 # the temperature param self.n_playout = 200 # num of simulations for each move self.c_puct = 5 self.board_width = 8 self.board_height = 8 self.model_path = os.path.join("./models/curr_model_100rollout.pt") #self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params=None) #self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout) self.mcts_player = MCTS_Pure(c_puct=5, n_playout=self.n_playout) self.env = gym.make("Reversi8x8-v0") self.init_model()
class AI(QtCore.QThread): finishSignal = QtCore.pyqtSignal(int, int, int, int) # 构造函数里增加形参 def __init__(self, board, parent=None): super(AI, self).__init__(parent) self.board = board # 重写 run() 函数 def run(self): self.ai = MCTSPlayer(c_puct=5, n_playout=20) # self.ai.board = self.board move = self.ai.get_action(self.board) self.board.do_move(move) print(move) self.finishSignal.emit(int(move[0]), int(move[1]), int(move[2]), int(move[3]))
opponent.reset_player() board = BoardSlim(width=BOARD_SHAPE[0], height=BOARD_SHAPE[1], n_in_row=4) game = Game(board) winner = game.start_play(player, opponent, start_player=i % 2, is_shown=(i == 0) and show_game) print("winner: ", winner) if winner == 1: stats["wins"] += 1 elif winner == -1: stats["ties"] += 1 stats["length"].append(BOARD_SHAPE[0] * BOARD_SHAPE[1] - len(board.availables)) return stats if __name__ == "__main__": player = load_player() for playout in playouts: mcts_player = MCTSPlayer(n_playout=playout) stats = evaluate_player(player, mcts_player, show_game=True) print() print("win ratio agains %d playouts: " % playout, stats["wins"] / num_games) print("tie ratio agains %d playouts: " % playout, stats["ties"] / num_games) print("average game length agains %d playouts: ", np.mean(stats["length"]))
opts, args = getopt.getopt(sys.argv[1:], "c:m:") except getopt.GetoptError: print('TwelveShogi.py -c <startColor(0 or 1)> -m <mode(0 or 1 or 2)>') sys.exit() for opt, arg in opts: if opt == '-c': startColor = int(arg) elif opt == '-m': mode = int(arg) shogiboard = ShogiBoard() shogiboard.curStepColor = startColor shogiboard.redrawBoard(window) if mode == 1: # AIPlayer = TSAI() AIPlayer = MCTS_Pure() # AIPlayer = MCTSPlayer() elif mode == 2: # AIPlayer = TSAI() AIPlayer = MCTS_Pure() trainCounts = 0 # AIPlayer = MCTSPlayer(is_selfplay = 1) top = 80 left = 70 yGap = 125 xGap = 140 curRow = 3 curCol = 0