def play_series(self, agt1, agt2): player1_wins = 0 for i in range(self.G): print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r") game = StateManager(size=self.get_size()) if i == self.G - 1: print_game = input("Print the last game? (Y/n): ") else: print_game = "n" move_counter = 0 while not game.is_game_over(): game_state = game.get_nn_state() player_turn = game_state[0] if player_turn == 1: predictions = agt1.predict(game_state) else: predictions = agt2.predict(game_state) legal_moves = game.get_legal_moves() if move_counter < topp_random_moves * 2: move = random.choice(legal_moves) else: move = legal_moves[np.argmax(predictions)] game.move(move) move_counter += 1 if print_game.lower() == "y": game.print_board() if game.get_winner() == 1: player1_wins += 1 print("{} won {}/{} against {}.".format(agt1.name, player1_wins, self.G, agt2.name))
tree = Tree(game) win_stats = [] RBUF = collections.deque(maxlen=rbuf_max_size) for i in range(offset, num_of_games + 1): print("Episode: {}/{}".format(i, num_of_games)) state = tree.root while (not sm.is_game_over()): best_child, training_case = agent.uct_search(tree, state, num_search_games) RBUF.append(training_case) sm.move(best_child.move) state = best_child if verbose and i == num_of_games: sm.print_board() if save_interval > 0 and i == offset: anet.model.save("./{}/size_{}-ep_{}.h5".format(save_dir, board_size, i)) print("Saved model to: ./{}/size_{}-ep_{}.h5".format( save_dir, board_size, i)) train_anet(anet, RBUF) if save_interval > 0 and (i % save_interval == 0): anet.model.save("./{}/size_{}-ep_{}.h5".format(save_dir, board_size, i)) print("Saved model to: ./{}/size_{}-ep_{}.h5".format( save_dir, board_size, i))