Example #1
0
def human_play(net_checkpoint):
    globals.set_gpu_visible(True)
    from time import sleep
    from MCTS import MCTS
    from NNet import NNet
    from game import GameState

    net = NNet(net_checkpoint)

    # init environment
    game, first_player = GameState.generate_training_game(randint(0, 10000))
    mcts = MCTS(net, game.copy())

    player = first_player
    # Game loop
    while True:

        #net turn
        action = np.argmax(mcts.get_pi(player))
        game.next_state(action, player)

        print('\n\n\nnet MOVED\n' + str(game))
        sleep(0.7)

        if game.is_finished():
            break

        action = int(input(': '))
        game.next_state(action, -player)

        print('\n\n\nyou MOVED\n' + str(game))
        sleep(0.7)

        if game.is_finished():
            break

        if not mcts.root.has_children():
            mcts.search(mcts.root, -player)
        assert (mcts.root.children[action])
        mcts.root = mcts.root.children[action]

    print('Game over')
Example #2
0
def evaluate_net_process(net, net_opponent, chunk_counter, games_per_chunk,
                         results_queue):
    """
    Best network plays some games against the new trained one.
    Returns the sum of ties, wins and loses, where ties are 0,
    wins are 1 and loses -1.
    """
    globals.set_gpu_visible(False)
    from multiprocessing import current_process as cp
    from time import sleep
    from MCTS import MCTS
    from NNet import NNet

    net = NNet(net or (globals.get_last_iteration() + 1))
    net_opponent = NNet(net_opponent)

    result = 0

    while True:

        # Update chunk counter
        with chunk_counter.get_lock():
            chunk_counter.value -= 1
            chunk_index = chunk_counter.value

        if chunk_index < 0:
            break

        print('--', cp().name, '-- Starting chunk', chunk_index)

        for game_it in range(games_per_chunk * chunk_index,
                             games_per_chunk * (chunk_index + 1)):

            # init environment
            game, first_player = GameState.generate_training_game(game_it)
            mcts = MCTS(net, game.copy())
            mcts_opponent = MCTS(net_opponent, game.copy())

            player = first_player
            # Game loop
            while True:
                # debug
                #print('root state', mcts.root.state)

                # Net turn
                action = np.argmax(mcts.get_pi(player))
                game.next_state(action, player)
                if train_args.show_moves:
                    print('\n\n\nnet MOVED\n' + str(game.board))
                    sleep(0.7)

                if game.is_finished():
                    break

                if not mcts_opponent.root.has_children():
                    # If there are no children, expand the current mcts root, but
                    # first check if the game is finished. If that's the case nothing
                    # is expanded
                    mcts_opponent.search(mcts_opponent.root, player)
                assert (mcts_opponent.root.children[action])
                mcts_opponent.root = mcts_opponent.root.children[action]

                # Net opponent turn

                # debug
                #print('root state', mcts_opponent.root.state)

                action = np.argmax(mcts_opponent.get_pi(-player))
                game.next_state(action, -player)
                if train_args.show_moves:
                    print('\n\n\nnet_opponent MOVED\n' + str(game.board))
                    sleep(0.7)

                if game.is_finished():
                    break

                if not mcts.root.has_children():
                    #same as above
                    mcts.search(mcts.root, -player)
                assert (mcts.root.children[action])
                mcts.root = mcts.root.children[action]

            # Update overall result ('net' was the first to move)
            result += 0 if not any(game.valid_moves()) else (
                1 if game.get_winner() == first_player else -1)

            print('--', cp().name, '-- selfplay match ended')

    print('--', cp().name, '-- Finished')

    results_queue.put(result)

    return 0
Example #3
0
def selfplay_process(chunk_counter, games_per_chunk, all_moves):
    """
    Process that plays games_per_proc games
    """
    globals.set_gpu_visible(False)
    from multiprocessing import current_process as cp
    from MCTS import MCTS
    from NNet import NNet
    from pickle import dump
    from os import path

    net = NNet('best')

    examples = []  #[board, pi, winner] moves of played games

    while True:

        # Update chunk counter
        with chunk_counter.get_lock():
            chunk_counter.value -= 1
            chunk_index = chunk_counter.value

        if chunk_index < 0:
            break

        print('--', cp().name, '-- Starting chunk', chunk_index)

        for game_it in range(games_per_chunk * chunk_index,
                             games_per_chunk * (chunk_index + 1)):

            game_moves = []  #[board, pi, winner] moves of single game

            # Init game environment
            game, first_player = GameState.generate_training_game(game_it)
            mcts = MCTS(net, game.copy())

            # Game loop
            player = first_player
            while not game.is_finished():
                pi = mcts.get_pi(player)
                if train_args.show_moves:
                    print(str(game) + '\n\n\n')
                    sleep(1)
                assert (not any(np.isnan(pi)))  #debug only
                game_moves.append([game.canon_board(player), pi,
                                   0])  #winner is set temporary to 0
                game.next_state(np.argmax(pi), player)
                player *= -1

            print('--', cp().name, '-- selfplay match ended')

            # Iterate over game_moves to set the right winner after the game
            winner = game.get_winner()
            if winner:  #winner==0: game is tie
                winner = 1 if winner == first_player else -1
                for move in game_moves:
                    move[2] = winner
                    winner *= -1
            """
            # Discard first moves
            if len(game_moves) >= 8:
                game_moves = game_moves[8:]
            """
            """
            # Add moves of this game if it's not a tie
            if not winner==0:
                examples += game_moves
                continue
            """

            # Add all moves from the game to the examples
            examples += game_moves

        # Save on disk the data of the current iteration. In case of stop in the middle of an
        # iteration, these data are not restored automatically, need to do that by hand. Every
        # .pickle file is just a python list in the form [[s, pi, v], [s, pi, v], ...], so you
        # only need to append this list to the examples
        tmp_dir = 'tmp/'
        if not path.exists(tmp_dir):
            mkdir(tmp_dir)
        with open(tmp_dir + 'examples_{}.pickle'.format(cp().name), 'wb') as f:
            dump([chunk_index, examples], f)

    print('--', cp().name, '-- Finished')

    # Append moves to the queue
    all_moves.put(examples)

    return 0