def play_with_agents(self, agt1, agt2): player_turn = 1 player1_wins = 0 for i in range(self.G): print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r") sm = StateManager(5) agent = MCTS(exploration_rate=1, anet=agt1) game = sm.create_game() tree = Tree(game, chanceOfRandom=0.0) state = tree.root while not sm.is_finished(): if player_turn == 1: agent.anet = agt1 best_child = agent.uct_search(tree, state, num_search_games) else: agent.anet = agt2 best_child = agent.uct_search(tree, state, num_search_games) game.execute_move(best_child.move) state = best_child if sm.get_winner() == 1: player1_wins += 1 print("{} won {}/{} against {}.".format(agt1.name, player1_wins, self.G, agt2.name)) print(np.reshape(sm.game.board, (boardsize, boardsize)))
def play_series(self, agt1, agt2): player1_wins = 0 for i in range(self.G): print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r") sm = StateManager(5) while not sm.is_finished(): player = sm.game.get_current_player() state = np.array( [np.concatenate((player, sm.game.board), axis=None)]) if player == 1: predictions = agt1.predict(state)[0] else: predictions = agt2.predict(state)[0] legal_moves = sm.get_legal_moves() if len(sm.game.executedMoves) <= 1: best_move = random.choice(legal_moves) else: best_move = self.choose_best_move(predictions, legal_moves) sm.execute_move(best_move) if sm.get_winner() == 1: player1_wins += 1 print("{} won {}/{} against {}.".format(agt1.name, player1_wins, self.G, agt2.name)) print(np.reshape(sm.game.board, (boardsize, boardsize))) print(sm.game.executedMoves) return player1_wins
def rollout(self, leaf): size = int((len(leaf.state) - 1) ** 0.5) leaf_state, first_iteration = StateManager(size, leaf.state), True while True: if leaf_state.player1_won(): score = 1.0 return score elif leaf_state.player2_won(): score = -1.0 return score if leaf_state.is_finished(): print("No winner error") quit() if first_iteration: possible_moves = leaf_state.get_moves() move = possible_moves[random.randint(0, len(possible_moves) - 1)] first_iteration = False else: move = leaf_state.convert_to_move(self.nn.get_action(leaf_state.string_representation())) leaf_state.make_move(move)
""" Initializations """ anet = ANET(boardsize) agent = MCTS(exploration_rate=1, anet=anet) sm = StateManager(boardsize) game = sm.create_game() tree = Tree(game, 1.0) win_stats = [] # TODO: Save interval for ANET parameters RBUF = collections.deque(maxlen=500) for i in range(1, num_of_games + 1): progress_bar(i + 1) state = tree.root while (not sm.is_finished()): player = sm.game.get_current_player() best_child = agent.uct_search(tree, state, num_search_games) distribution = get_distribution(best_child.parent) RBUF.append((np.concatenate((player, best_child.game.board), axis=None), distribution)) x_train, y_train = zip(*RBUF) print("Move: ", best_child.move) game.execute_move(best_child.move) state = best_child # Train ANET on a random minibatch from RBUF train_anet(anet, RBUF) if i % save_interval == 0: anet.model.save('./trainedModels/model-{}.h5'.format(i))