def self_play(self, first_color):
        """
        This function executes one episode of self-play, starting with player 1.
        As the game is played, each turn is added as a training example to
        train_examples. The game is played till the game ends. After the game
        ends, the outcome of the game is used to assign values to each example
        in train_examples.
        """

        train_examples = []
        gomoku = Gomoku(self.n, self.n_in_row, first_color)
        mcts = MCTS("./models/checkpoint.pt", self.thread_pool_size,
                    self.c_puct, self.num_mcts_sims, self.c_virtual_loss,
                    self.action_size, self.mcts_use_gpu)

        episode_step = 0
        while True:
            episode_step += 1

            # prob
            temp = self.temp if episode_step <= self.explore_num else 0
            prob = np.array(list(mcts.get_action_probs(gomoku, temp)))

            # generate sample
            board = tuple_2d_to_numpy_2d(gomoku.get_board())
            last_action = gomoku.get_last_move()
            cur_player = gomoku.get_current_color()

            sym = self.get_symmetries(board, prob)
            for b, p in sym:
                train_examples.append([b, last_action, cur_player, p])

            # dirichlet noise
            legal_moves = list(gomoku.get_legal_moves())
            noise = 0.25 * np.random.dirichlet(
                self.dirichlet_alpha * np.ones(np.count_nonzero(legal_moves)))

            prob_noise = 0.75 * prob
            j = 0
            for i in range(len(prob_noise)):
                if legal_moves[i] == 1:
                    prob_noise[i] += noise[j]
                    j += 1
            prob_noise /= np.sum(prob_noise)
            action = np.random.choice(len(prob_noise), p=prob_noise)

            # execute move
            gomoku.execute_move(action)
            mcts.update_with_move(action)

            # is ended
            ended, winner = gomoku.get_game_status()
            if ended == 1:
                # b, last_action, cur_player, p, v
                return [(x[0], x[1], x[2], x[3], x[2] * winner)
                        for x in train_examples]
    def play_with_human(self,
                        human_first=True,
                        checkpoint_name="best_checkpoint"):
        t = threading.Thread(target=self.gomoku_gui.loop)
        t.start()

        # load best model
        libtorch_best = NeuralNetwork('./models/best_checkpoint.pt',
                                      self.libtorch_use_gpu,
                                      self.num_mcts_threads * 2)
        mcts_best = MCTS(libtorch_best, self.num_mcts_threads * 2, self.c_puct,
                         self.num_mcts_sims * 4, self.c_virtual_loss,
                         self.action_size)

        # create gomoku game
        human_color = self.gomoku_gui.get_human_color()
        gomoku = Gomoku(self.n, self.n_in_row,
                        human_color if human_first else -human_color)

        players = ["alpha", None, "human"
                   ] if human_color == 1 else ["human", None, "alpha"]
        player_index = human_color if human_first else -human_color

        while True:
            player = players[player_index + 1]

            # select move
            if player == "alpha":
                prob = mcts_best.get_action_probs(gomoku)
                best_move = int(np.argmax(np.array(list(prob))))
                self.gomoku_gui.execute_move(player_index, best_move)
            else:
                self.gomoku_gui.set_is_human(True)
                # wait human action
                while self.gomoku_gui.get_is_human():
                    time.sleep(0.1)
                best_move = self.gomoku_gui.get_human_move()

            # execute move
            gomoku.execute_move(best_move)

            # check game status
            ended, winner = gomoku.get_game_status()
            if ended == 1:
                break

            # update tree search
            mcts_best.update_with_move(best_move)

            # next player
            player_index = -player_index

        print("HUMAN WIN" if winner == human_color else "ALPHA ZERO WIN")

        t.join()
Beispiel #3
0
    def _contest(self, network1, network2, first_player, show):
        # create MCTS
        player1 = MCTS(network1, self.num_mcts_threads, self.c_puct,
                       self.num_mcts_sims, self.c_virtual_loss,
                       self.action_size)
        player2 = MCTS(network2, self.num_mcts_threads, self.c_puct,
                       self.num_mcts_sims, self.c_virtual_loss,
                       self.action_size)

        # prepare
        players = [player2, None, player1]
        player_index = first_player
        gomoku = Gomoku(self.n, self.n_in_row, first_player)
        if show:
            self.gomoku_gui.reset_status()

        # play
        while True:
            player = players[player_index + 1]

            # select best move
            prob = player.get_action_probs(gomoku)
            best_move = int(np.argmax(np.array(list(prob))))

            # execute move
            gomoku.execute_move(best_move)
            if show:
                self.gomoku_gui.execute_move(player_index, best_move)

            # check game status
            ended, winner = gomoku.get_game_status()
            if ended == 1:
                return winner

            # update search tree
            player1.update_with_move(best_move)
            player2.update_with_move(best_move)

            # next player
            player_index = -player_index
Beispiel #4
0
    def self_play(self, first_color, libtorch, show):
        """
        This function executes one episode of self-play, starting with player 1.
        As the game is played, each turn is added as a training example to
        train_examples. The game is played till the game ends. After the game
        ends, the outcome of the game is used to assign values to each example
        in train_examples.
        """
        train_examples = []

        player1 = MCTS(libtorch, self.num_mcts_threads, self.c_puct,
                       self.num_mcts_sims, self.c_virtual_loss,
                       self.action_size)
        player2 = MCTS(libtorch, self.num_mcts_threads, self.c_puct,
                       self.num_mcts_sims, self.c_virtual_loss,
                       self.action_size)
        players = [player2, None, player1]
        player_index = 1

        gomoku = Gomoku(self.n, self.n_in_row, first_color)

        if show:
            self.gomoku_gui.reset_status()

        episode_step = 0
        while True:
            episode_step += 1
            player = players[player_index + 1]

            # get action prob
            if episode_step <= self.num_explore:
                prob = np.array(
                    list(player.get_action_probs(gomoku, self.temp)))
            else:
                prob = np.array(list(player.get_action_probs(gomoku, 0)))

            # generate sample
            board = tuple_2d_to_numpy_2d(gomoku.get_board())
            last_action = gomoku.get_last_move()
            cur_player = gomoku.get_current_color()

            sym = self.get_symmetries(board, prob, last_action)
            for b, p, a in sym:
                train_examples.append([b, a, cur_player, p])

            # dirichlet noise
            legal_moves = list(gomoku.get_legal_moves())
            noise = 0.1 * np.random.dirichlet(
                self.dirichlet_alpha * np.ones(np.count_nonzero(legal_moves)))

            prob = 0.9 * prob
            j = 0
            for i in range(len(prob)):
                if legal_moves[i] == 1:
                    prob[i] += noise[j]
                    j += 1
            prob /= np.sum(prob)

            # execute move
            action = np.random.choice(len(prob), p=prob)

            if show:
                self.gomoku_gui.execute_move(cur_player, action)
            gomoku.execute_move(action)
            player1.update_with_move(action)
            player2.update_with_move(action)

            # next player
            player_index = -player_index

            # is ended
            ended, winner = gomoku.get_game_status()
            if ended == 1:
                # b, last_action, cur_player, p, v
                return [(x[0], x[1], x[2], x[3], x[2] * winner)
                        for x in train_examples]
import time


if __name__ == "__main__":
    gomoku = Gomoku(15, 5, 1)
    gomoku.execute_move(0 + 40)
    gomoku.execute_move(99)
    gomoku.execute_move(1 + 40)
    gomoku.execute_move(98)
    gomoku.execute_move(2 + 40)
    gomoku.execute_move(97)
    gomoku.execute_move(3 + 40)
    gomoku.execute_move(96)

    gomoku.display()

    mcts = MCTS("./models/checkpoint.pt", 4, 2.5, 1600, 2.5, 225, True)

    print("RUNNING")
    while True:
        time_start=time.time()
        res = mcts.get_action_probs(gomoku, 1)
        time_end=time.time()
        print('get_action_probs', time_end - time_start)

        print(list(res))
        best_action = int(np.argmax(np.array(list(res))))
        print(best_action, res[best_action])

        mcts.update_with_move(-1)
    def learn(self):
        # train the model by self play
        t = threading.Thread(target=self.gomoku_gui.loop)
        t.start()

        if os.path.exists('./models/checkpoint'):
            print("loading checkpoint...")
            self.nnet.load_model('models', "checkpoint")
            self.load_samples("models", "checkpoint")

        # generate .pt for libtorch
        self.nnet.save_model('models', "checkpoint")
        self.nnet.save_model('models', "best_checkpoint")

        for i in range(1, self.num_iters + 1):
            print("ITER ::: " + str(i))

            # self play
            first_color = 1
            for eps in range(1, self.num_eps + 1):
                examples = self.self_play(first_color)
                self.examples_buffer.extend(examples)\

                first_color = -first_color
                print("EPS :: " + str(eps) + ", EXAMPLES :: " +
                      str(len(examples)))

            # sample train data
            if len(self.examples_buffer) < self.batch_size:
                continue

            print("sampling...")
            train_data = sample(self.examples_buffer, self.batch_size)

            # train neural network
            self.nnet.train(train_data)
            self.nnet.save_model('models', "checkpoint")

            if i % self.check_freq == 0:
                self.save_samples("models", "checkpoint")

                # compare performance
                mcts = MCTS("./models/checkpoint.pt", self.thread_pool_size,
                            self.c_puct, self.num_mcts_sims,
                            self.c_virtual_loss, self.action_size,
                            self.mcts_use_gpu)
                mcts_best = MCTS("./models/best_checkpoint.pt",
                                 self.thread_pool_size, self.c_puct,
                                 self.num_mcts_sims, self.c_virtual_loss,
                                 self.action_size, self.mcts_use_gpu)

                one_won, two_won, draws = self.contest(mcts, mcts_best,
                                                       self.contest_num)
                print("NEW/PREV WINS : %d / %d ; DRAWS : %d" %
                      (one_won, two_won, draws))

                if one_won + two_won > 0 and float(one_won) / (
                        one_won + two_won) > self.update_threshold:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_model('models', "best_checkpoint")
                else:
                    print('REJECTING NEW MODEL')

                del mcts
                del mcts_best

        t.join()
Beispiel #7
0
                res = self.contest(player1, player2, show)
                if res == 1:
                    win1_cnt += 1
                elif res == -1:
                    win2_cnt += 1
                else:
                    draw_cnt += 1
            else:
                res = self.contest(player2, player1, show)
                if res == 1:
                    win2_cnt += 1
                elif res == -1:
                    win1_cnt += 1
                else:
                    draw_cnt += 1
        print("\nTournament finished.")
        print("Player1 vs. player2: {:d}/{:d}/{:d} (win1/win2/draw)".format(
            win1_cnt, win2_cnt, draw_cnt))


network1 = NeuralNetwork("../models/model_8x8_5_2000iters_cpu.pt", False, 4)
network2 = NeuralNetwork("../models/model_8x8_5_2000iters_cpu.pt", False, 4)

game = Game()
human = Human()
mcts1 = MCTS(4, 100000, 5, 3)
mcts2 = MCTS(4, 100000, 5, 3)
alphazero1 = AlphaZero(network1, 4, 800, 5, 3)
alphazero2 = AlphaZero(network2, 4, 800, 5, 3)
game.contest(mcts1, mcts2)
# game.tournament(mcts1, mcts2, round = 10, show = True)