def play_with_agents(self, agt1, agt2): player_turn = 1 player1_wins = 0 for i in range(self.G): print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r") sm = StateManager(5) agent = MCTS(exploration_rate=1, anet=agt1) game = sm.create_game() tree = Tree(game, chanceOfRandom=0.0) state = tree.root while not sm.is_finished(): if player_turn == 1: agent.anet = agt1 best_child = agent.uct_search(tree, state, num_search_games) else: agent.anet = agt2 best_child = agent.uct_search(tree, state, num_search_games) game.execute_move(best_child.move) state = best_child if sm.get_winner() == 1: player1_wins += 1 print("{} won {}/{} against {}.".format(agt1.name, player1_wins, self.G, agt2.name)) print(np.reshape(sm.game.board, (boardsize, boardsize)))
import numpy as np import collections def train_anet(anet, RBUF): # Creates a minibatch of the RBUF and trains the anet on the minibatch batch_size = min(len(RBUF), 32) minibatch = random.sample(RBUF, batch_size) anet.train(minibatch) """ Initializations """ anet = ANET(size=board_size) agent = MCTS(exploration_rate=1, anet=anet) sm = StateManager() game = sm.create_game() tree = Tree(game) win_stats = [] RBUF = collections.deque(maxlen=rbuf_max_size) for i in range(offset, num_of_games + 1): print("Episode: {}/{}".format(i, num_of_games)) state = tree.root while (not sm.is_game_over()): best_child, training_case = agent.uct_search(tree, state, num_search_games) RBUF.append(training_case) sm.move(best_child.move) state = best_child if verbose and i == num_of_games: