def run(start_player=0, is_shown=1): # run a gomoku game with AI # you can set # human vs AI or AI vs AI n = 5 width, height = 15, 15 # model_file = 'model_15_15_5/best_policy.model' # width, height = 6, 6 # model_file = 'model/best_policy.model' # width, height = 11, 11 # model_file = 'model/best_policy.model' model_file = 'training/model_best/policy.model' # model_file = 'training/best_policy.model' p = os.getcwd() model_file = path.join(p, model_file) board = Board(width=width, height=height, n_in_row=n) game = Game(board) mcts_player = MCTS_pure(5, 8000) best_policy = PolicyValueNet(board_width=width, board_height=height, block=19, init_model=model_file, cuda=True) # alpha_zero vs alpha_zero # best_policy.save_numpy(best_policy.network_all_params) # best_policy.load_numpy(best_policy.network_oppo_all_params) alpha_zero_player = MCTSPlayer( best_policy, model_file, policy_value_function=best_policy.policy_value_fn_random, action_fc=best_policy.action_fc_test, evaluation_fc=best_policy.evaluation_fc2_test, c_puct=5, n_playout=400, is_selfplay=False) # alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random, # action_fc=best_policy.action_fc_test_oppo, # evaluation_fc=best_policy.evaluation_fc2_test_oppo, # c_puct=5, # n_playout=400, # is_selfplay=False) # human player, input your move in the format: 2,3 # set start_player=0 for human first # play in termianl without GUI # human = Human() # win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True) # return win # play in GUI game.start_play_with_UI(alpha_zero_player) # Play with alpha zero
def __init__(self): # def run(start_player=0,is_shown=1): self.start_player = 0 self.is_shown = 1 menu = Gui_menu() # run a gomoku game with AI # you can set # human vs AI or AI vs AI self.n = 5 # Rule 5 목 if menu.rule == 11: width, height = 11, 11 # 바둑판의 폭과 높이 model_file = 'model_11_11_5/best_policy.model' elif menu.rule == 15: width, height = 15, 15 # 바둑판의 폭과 높이 model_file = 'model_15_15_5/best_policy.model' p = os.getcwd() # 현재 작업 경로를 얻음 model_file = path.join(p, model_file) # 파일 경로 + model file name board = Board(width=width, height=height, n_in_row=self.n) # 게임 판 game = Game(board) mcts_player = MCTS_pure(5, 400) best_policy = PolicyValueNet(board_width=width, board_height=height, block=19, init_model=model_file, cuda=True) # alpha_zero vs alpha_zero # best_policy.save_numpy(best_policy.network_all_params) # best_policy.load_numpy(best_policy.network_oppo_all_params) alpha_zero_player = MCTSPlayer( policy_value_function=best_policy.policy_value_fn_random, action_fc=best_policy.action_fc_test, evaluation_fc=best_policy.evaluation_fc2_test, c_puct=5, n_playout=400, is_selfplay=False) # alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random, # action_fc=best_policy.action_fc_test_oppo, # evaluation_fc=best_policy.evaluation_fc2_test_oppo, # c_puct=5, # n_playout=400, # is_selfplay=False) # human player, input your move in the format: 2,3 # set start_player=0 for human first # play in termianl without GUI # human = Human() # win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True) # return win # play in GUI game.start_play_with_UI(alpha_zero_player)