def run_episode(self): examples = [] board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = Game(player_blue, player_red, board) while not game.is_finished(self.iter_max): new_action = game.get_next_action() game.actions.append(new_action) if game.current_player == Color.BLUE: examples.append([ board.get_features(game.current_player, game.round), player_blue.current_node.get_policy(game.current_player), Color.BLUE ]) examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_blue.current_node.get_policy( game.current_player, data_augmentation=True), Color.BLUE ]) else: examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_red.current_node.get_policy(game.current_player, data_augmentation=True), Color.RED ]) examples.append([ board.get_features(game.current_player, game.round), player_red.current_node.get_policy(game.current_player), Color.RED ]) game.board.apply_action(new_action) game.switch_player() game.board.invalidate_action_cache( new_action) # Try to reduce memory usage game.round += 1 winner = game.get_winner() set_winner(examples, winner) return examples
def test_stockfish(self): board = get_random_board() process = get_process_stockfish(board, "level 40 5 0") player_blue = StockfishPlayer(Color.BLUE, process, think_time=-1) player_red = StockfishPlayer(Color.RED, process, think_time=-1) game = Game(player_blue, player_red, board) winner = game.run_game(200) print(winner) print(repr(game.board))
def fight(player_blue, player_red, iter_max, print_board=False): board = get_random_board() game = Game(player_blue, player_red, board) winner = game.run_game(iter_max, print_board=print_board) print("Winner:", winner) print("Score BLUE:", board.get_score(Color.BLUE)) print("Score RED:", board.get_score(Color.RED)) print(repr(board)) print(board) print(game.to_uci_usi()) return winner
def run_episode_stockfish(args): print("Starting episode", current_process().name) begin_time = time.time() iter_max = args board = get_random_board() process = get_process_stockfish(board) player_blue = StockfishPlayer(Color.BLUE, process, think_time=2) player_red = StockfishPlayer(Color.RED, process, think_time=2) game = run_game(board, player_blue, player_red, iter_max) print("Time Episode: ", time.time() - begin_time) return game.dumps()
def test_single_action_random(self): n_simulations = 800 node = MCTSNode() player_blue = RandomMCTSPlayer(Color.BLUE, n_simulations=n_simulations, current_node=node) player_red = RandomMCTSPlayer(Color.RED, n_simulations=n_simulations, current_node=node) board = get_random_board() game = Game(player_blue, player_red, board) game.get_next_action()
def test_random_vs_random(self): n_simulations = 400 node = MCTSNode() player_blue = RandomMCTSPlayer(Color.BLUE, n_simulations=n_simulations, current_node=node) player_red = RandomMCTSPlayer(Color.RED, n_simulations=n_simulations, current_node=node) # winner = fight(player_blue, player_red, 200) board = get_random_board() game = Game(player_blue, player_red, board) winner = game.run_game(200) self.assertIn(winner, [Color.BLUE, Color.RED]) print(game.to_json(node))
def test_single_action_nn(self): n_simulations = 800 player_blue = NNPlayer(Color.BLUE, n_simulations=n_simulations, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=n_simulations, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) board = get_random_board() game = Game(player_blue, player_red, board) game.get_next_action()
def run_episode_raw_not_nn(args): print("Starting episode", current_process().name) begin_time = time.time() n_simulations, iter_max = args board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = RandomMCTSPlayer(Color.BLUE, n_simulations=n_simulations, current_node=initial_node, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = RandomMCTSPlayer(Color.RED, n_simulations=n_simulations, current_node=initial_node, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = run_game(board, player_blue, player_red, iter_max) print("Time Episode: ", time.time() - begin_time) return game.to_json(initial_node)