Esempio n. 1
0
    def play_with_agents(self, agt1, agt2):
        player_turn = 1
        player1_wins = 0

        for i in range(self.G):
            print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r")
            sm = StateManager(5)
            agent = MCTS(exploration_rate=1, anet=agt1)
            game = sm.create_game()
            tree = Tree(game, chanceOfRandom=0.0)

            state = tree.root
            while not sm.is_finished():

                if player_turn == 1:
                    agent.anet = agt1
                    best_child = agent.uct_search(tree, state,
                                                  num_search_games)
                else:
                    agent.anet = agt2
                    best_child = agent.uct_search(tree, state,
                                                  num_search_games)

                game.execute_move(best_child.move)
                state = best_child

            if sm.get_winner() == 1:
                player1_wins += 1

        print("{} won {}/{} against {}.".format(agt1.name, player1_wins,
                                                self.G, agt2.name))
        print(np.reshape(sm.game.board, (boardsize, boardsize)))
Esempio n. 2
0
    def play_series(self, agt1, agt2):
        player1_wins = 0
        for i in range(self.G):
            print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r")
            sm = StateManager(5)

            while not sm.is_finished():
                player = sm.game.get_current_player()
                state = np.array(
                    [np.concatenate((player, sm.game.board), axis=None)])
                if player == 1:
                    predictions = agt1.predict(state)[0]
                else:
                    predictions = agt2.predict(state)[0]

                legal_moves = sm.get_legal_moves()
                if len(sm.game.executedMoves) <= 1:
                    best_move = random.choice(legal_moves)
                else:
                    best_move = self.choose_best_move(predictions, legal_moves)
                sm.execute_move(best_move)

            if sm.get_winner() == 1:
                player1_wins += 1

        print("{} won {}/{} against {}.".format(agt1.name, player1_wins,
                                                self.G, agt2.name))
        print(np.reshape(sm.game.board, (boardsize, boardsize)))
        print(sm.game.executedMoves)
        return player1_wins
 def rollout(self, leaf):
     size = int((len(leaf.state) - 1) ** 0.5)
     leaf_state, first_iteration = StateManager(size, leaf.state), True
     while True:
         if leaf_state.player1_won():
             score = 1.0
             return score
         elif leaf_state.player2_won(): 
             score = -1.0
             return score
         if leaf_state.is_finished():
             print("No winner error")
             quit()
         if first_iteration:
             possible_moves = leaf_state.get_moves()
             move = possible_moves[random.randint(0, len(possible_moves) - 1)]
             first_iteration = False
         else:
             move = leaf_state.convert_to_move(self.nn.get_action(leaf_state.string_representation()))
         leaf_state.make_move(move)
Esempio n. 4
0
""" Initializations """
anet = ANET(boardsize)
agent = MCTS(exploration_rate=1, anet=anet)
sm = StateManager(boardsize)
game = sm.create_game()
tree = Tree(game, 1.0)
win_stats = []

# TODO: Save interval for ANET parameters
RBUF = collections.deque(maxlen=500)

for i in range(1, num_of_games + 1):
    progress_bar(i + 1)
    state = tree.root

    while (not sm.is_finished()):
        player = sm.game.get_current_player()
        best_child = agent.uct_search(tree, state, num_search_games)
        distribution = get_distribution(best_child.parent)
        RBUF.append((np.concatenate((player, best_child.game.board),
                                    axis=None), distribution))
        x_train, y_train = zip(*RBUF)
        print("Move: ", best_child.move)
        game.execute_move(best_child.move)
        state = best_child

    # Train ANET on a random minibatch from RBUF
    train_anet(anet, RBUF)

    if i % save_interval == 0:
        anet.model.save('./trainedModels/model-{}.h5'.format(i))