Exemple #1
0
    def run_game(self, tree_visualization=True):
        for i in range(self.num_of_games):
            if self.am_i_minmax is True:
                self.user = Minimax()
            else:
                module, name = self.user_path.rsplit('.', 1)
                self.user = getattr(importlib.import_module(module),
                                    name)(number_of_simulation=500,
                                          simulation_depth=6)

            if self.is_user_defined_opponent:
                module, name = self.opponent_path.rsplit('.', 1)
                self.opponent = getattr(importlib.import_module(module),
                                        name)(number_of_simulation=1000)
            else:
                self.opponent = Minimax()

            print("New game!")
            print("Initial board >>")
            # initialization:
            initial_board = [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]
            new_game = Kalah(initial_board)
            if i % 2 == 1:
                new_game.player = False
            new_game.show_board()
            if not self.am_i_minmax:
                self.user.initial_root(initial_board)
            if self.is_user_defined_opponent:
                self.opponent.initial_root(initial_board)
            turn = 0

            while not new_game.is_game_over():
                turn += 1
                # pick a hole:
                if new_game.player:
                    start_time = time.time()
                    next_position = self.user.search(
                        copy.deepcopy(new_game.get_board()))
                    end_time = time.time()
                    print('measured time: ', end_time - start_time)
                    self.is_time_out(start_time, end_time)
                else:
                    next_position = self.opponent.search(
                        copy.deepcopy(new_game.get_board()))
                # update:

                tmp_score, free_turn = new_game.move(next_position)
                # print:
                if not self.am_i_minmax:
                    print("winning rate:",
                          self.user.print_winning_rate(next_position))
                if tree_visualization:
                    show_image(self.user.g.render(view=False),
                               auto_close=False)
                if not self.am_i_minmax:
                    self.user.update_root(next_position,
                                          copy.deepcopy(new_game.get_board()),
                                          copy.deepcopy(new_game.player))
                if self.is_user_defined_opponent:
                    self.opponent.update_root(
                        next_position, copy.deepcopy(new_game.get_board()),
                        copy.deepcopy(not new_game.player))

            # end of a game, print result:
            new_game.show_board()
            turn = 0
            self.score_board(i, new_game.result())
            del self.user
            del self.opponent
Exemple #2
0
def main():
    num_episodes = 10000
    # player = User(simulation_depth=6, number_of_simulation=1000)

    if runner.am_i_minmax:
        runner.user = Minimax()
    else:
        runner.user = User(simulation_depth=6, number_of_simulation=200)

    if runner.is_user_defined_opponent:
        module, name = runner.opponent_path.rsplit('.', 1)
        runner.opponent = getattr(importlib.import_module(module),
                                  name)(number_of_simulation=1000)
    else:
        runner.opponent = Minimax()

    for i_episode in range(num_episodes):
        # Initialize the environment and state
        print("New games for training!")
        initial_board = [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]
        new_game = Kalah(initial_board)
        if i_episode % 2 == 0:
            new_game.player = False

        if not runner.am_i_minmax:
            runner.user.initial_root(initial_board)

        if runner.is_user_defined_opponent:
            runner.opponent.initial_root(initial_board)
        num = 0
        loss_sum = 0
        for turn in count():
            # 행동 선택과 수행

            current_board = copy.deepcopy(new_game.get_board())
            state = game_state_to_tensor(current_board)

            if new_game.player:  # 내차례
                cur_policy = left_policy_net  # 왼쪽 모델
                cur_target = left_target_net
                opt = optimizer_left
                while True:
                    action = select_action(current_board, cur_target)
                    next_position = action.item()
                    if new_game.get_board(
                    )[next_position] != 0 or new_game.is_game_over():
                        break

            else:  # 적 차례
                cur_policy = left_policy_net  # 오른쪽 모델
                cur_target = left_target_net
                opt = optimizer_left
                while True:
                    #action = torch.tensor([[runner.opponent.search(current_board)]], device=device)
                    action = select_action(current_board, cur_target)
                    next_position = action.item()
                    if new_game.get_board(
                    )[next_position] != 0 or new_game.is_game_over():
                        break

            _, free_turn = new_game.move(next_position)

            # 새로운 상태 관찰
            next_board = copy.deepcopy(new_game.get_board())
            next_state = game_state_to_tensor(next_board)
            reward = evaluation(new_game.is_game_over(), next_board)
            # 메모리에 변이 저장
            reward = torch.tensor([reward], device=device, dtype=torch.float)
            memory.push(state, action, next_state, reward)
            # 로스 계산
            loss = optimize_model(free_turn, cur_policy, cur_target, opt)
            loss_sum += loss

            if not runner.am_i_minmax:
                runner.user.update_root(next_position,
                                        copy.deepcopy(new_game.get_board()),
                                        copy.deepcopy(new_game.player))

            if runner.is_user_defined_opponent:
                runner.opponent.update_root(
                    next_position, copy.deepcopy(new_game.get_board()),
                    copy.deepcopy(not new_game.player))
            if new_game.is_game_over():
                num = turn
                break

        runner.score_board(i_episode, new_game.result())
        print(i_episode, 'game Average loss: ', loss_sum / num)
        print()
        # 목표 네트워크 업데이트
        if i_episode % TARGET_UPDATE == 1:
            left_target_net.load_state_dict(left_policy_net.state_dict())
            #right_target_net.load_state_dict(right_policy_net.state_dict())

        if i_episode % 50 == 49:  # 50번 마다 한번 저장
            torch.save(left_target_net.state_dict(), 'checkpoint_left.pth')
            runner.wins = 0
            runner.losses = 0
            runner.draws = 0

            #torch.save(right_target_net.state_dict(), 'checkpoint_right.pth')

    torch.save(left_target_net.state_dict(), 'dqn_cnn_left.pth')
    #torch.save(right_target_net.state_dict(), 'dqn_cnn_right.pth')
    print('Complete')