コード例 #1
0
    def move(self, position, board, is_my_move=True):
        '''
        returns the board after given movement.
        you can get either your's or opponent's movement with the parameter "is_my_move"
        '''
        board = copy.deepcopy(board)
        prediction = Kalah(board)
        if not is_my_move:
            prediction.board = reverse_board(board)

        _, free_turn = prediction.move(position)
        board = prediction.board

        if not is_my_move:
            board = reverse_board(board)
        return board, prediction.is_game_over(), free_turn
コード例 #2
0
def main():
    num_episodes = 10000
    # player = User(simulation_depth=6, number_of_simulation=1000)

    if runner.am_i_minmax:
        runner.user = Minimax()
    else:
        runner.user = User(simulation_depth=6, number_of_simulation=200)

    if runner.is_user_defined_opponent:
        module, name = runner.opponent_path.rsplit('.', 1)
        runner.opponent = getattr(importlib.import_module(module),
                                  name)(number_of_simulation=1000)
    else:
        runner.opponent = Minimax()

    for i_episode in range(num_episodes):
        # Initialize the environment and state
        print("New games for training!")
        initial_board = [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]
        new_game = Kalah(initial_board)
        if i_episode % 2 == 0:
            new_game.player = False

        if not runner.am_i_minmax:
            runner.user.initial_root(initial_board)

        if runner.is_user_defined_opponent:
            runner.opponent.initial_root(initial_board)
        num = 0
        loss_sum = 0
        for turn in count():
            # 행동 선택과 수행

            current_board = copy.deepcopy(new_game.get_board())
            state = game_state_to_tensor(current_board)

            if new_game.player:  # 내차례
                cur_policy = left_policy_net  # 왼쪽 모델
                cur_target = left_target_net
                opt = optimizer_left
                while True:
                    action = select_action(current_board, cur_target)
                    next_position = action.item()
                    if new_game.get_board(
                    )[next_position] != 0 or new_game.is_game_over():
                        break

            else:  # 적 차례
                cur_policy = left_policy_net  # 오른쪽 모델
                cur_target = left_target_net
                opt = optimizer_left
                while True:
                    #action = torch.tensor([[runner.opponent.search(current_board)]], device=device)
                    action = select_action(current_board, cur_target)
                    next_position = action.item()
                    if new_game.get_board(
                    )[next_position] != 0 or new_game.is_game_over():
                        break

            _, free_turn = new_game.move(next_position)

            # 새로운 상태 관찰
            next_board = copy.deepcopy(new_game.get_board())
            next_state = game_state_to_tensor(next_board)
            reward = evaluation(new_game.is_game_over(), next_board)
            # 메모리에 변이 저장
            reward = torch.tensor([reward], device=device, dtype=torch.float)
            memory.push(state, action, next_state, reward)
            # 로스 계산
            loss = optimize_model(free_turn, cur_policy, cur_target, opt)
            loss_sum += loss

            if not runner.am_i_minmax:
                runner.user.update_root(next_position,
                                        copy.deepcopy(new_game.get_board()),
                                        copy.deepcopy(new_game.player))

            if runner.is_user_defined_opponent:
                runner.opponent.update_root(
                    next_position, copy.deepcopy(new_game.get_board()),
                    copy.deepcopy(not new_game.player))
            if new_game.is_game_over():
                num = turn
                break

        runner.score_board(i_episode, new_game.result())
        print(i_episode, 'game Average loss: ', loss_sum / num)
        print()
        # 목표 네트워크 업데이트
        if i_episode % TARGET_UPDATE == 1:
            left_target_net.load_state_dict(left_policy_net.state_dict())
            #right_target_net.load_state_dict(right_policy_net.state_dict())

        if i_episode % 50 == 49:  # 50번 마다 한번 저장
            torch.save(left_target_net.state_dict(), 'checkpoint_left.pth')
            runner.wins = 0
            runner.losses = 0
            runner.draws = 0

            #torch.save(right_target_net.state_dict(), 'checkpoint_right.pth')

    torch.save(left_target_net.state_dict(), 'dqn_cnn_left.pth')
    #torch.save(right_target_net.state_dict(), 'dqn_cnn_right.pth')
    print('Complete')
コード例 #3
0
    def run_game(self, tree_visualization=True):
        for i in range(self.num_of_games):
            if self.am_i_minmax is True:
                self.user = Minimax()
            else:
                module, name = self.user_path.rsplit('.', 1)
                self.user = getattr(importlib.import_module(module),
                                    name)(number_of_simulation=500,
                                          simulation_depth=6)

            if self.is_user_defined_opponent:
                module, name = self.opponent_path.rsplit('.', 1)
                self.opponent = getattr(importlib.import_module(module),
                                        name)(number_of_simulation=1000)
            else:
                self.opponent = Minimax()

            print("New game!")
            print("Initial board >>")
            # initialization:
            initial_board = [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]
            new_game = Kalah(initial_board)
            if i % 2 == 1:
                new_game.player = False
            new_game.show_board()
            if not self.am_i_minmax:
                self.user.initial_root(initial_board)
            if self.is_user_defined_opponent:
                self.opponent.initial_root(initial_board)
            turn = 0

            while not new_game.is_game_over():
                turn += 1
                # pick a hole:
                if new_game.player:
                    start_time = time.time()
                    next_position = self.user.search(
                        copy.deepcopy(new_game.get_board()))
                    end_time = time.time()
                    print('measured time: ', end_time - start_time)
                    self.is_time_out(start_time, end_time)
                else:
                    next_position = self.opponent.search(
                        copy.deepcopy(new_game.get_board()))
                # update:

                tmp_score, free_turn = new_game.move(next_position)
                # print:
                if not self.am_i_minmax:
                    print("winning rate:",
                          self.user.print_winning_rate(next_position))
                if tree_visualization:
                    show_image(self.user.g.render(view=False),
                               auto_close=False)
                if not self.am_i_minmax:
                    self.user.update_root(next_position,
                                          copy.deepcopy(new_game.get_board()),
                                          copy.deepcopy(new_game.player))
                if self.is_user_defined_opponent:
                    self.opponent.update_root(
                        next_position, copy.deepcopy(new_game.get_board()),
                        copy.deepcopy(not new_game.player))

            # end of a game, print result:
            new_game.show_board()
            turn = 0
            self.score_board(i, new_game.result())
            del self.user
            del self.opponent
コード例 #4
0
class KalahUi:
    def __init__(self):
        self.game = Kalah(5, 5)
        self.maxSeeds = 50
        self.maxDigits = 2

        self.padding = ' ' * 12
        self.player1string = '  Player 1  '
        self.player2string = '  Player 2  '

        self.canContinue = True

    def interrupt(self):
        self.canContinue = False

    def formatFirstPlayer(self, player):
        invert = player % 2 == 1
        padding = ' ' * 12
        playerstring = f'  Player {player + 1}  '

        playerSeeds = [padding]
        playerSeeds += [
            f'[{str(seeds).rjust(self.maxDigits)}]  '
            for seeds in self.game.getPlayerSlice(player, includeStore=False)
        ]
        playerSeeds += [playerstring]

        playerHouses = [padding]
        playerHouses += [
            f' {str(house).rjust(self.maxDigits)}   '
            for house in range(1, self.game._houses + 1)
        ]
        playerHouses += [padding]

        return [
            ''.join(playerSeeds[::-1 if invert else 1]),
            ''.join(playerHouses[::-1 if invert else 1])
        ][::-1 if invert else 1]

    def formatStores(self):
        lead = ' ' * 4
        trail = ' ' * (12 - 6 - self.maxDigits)
        store1 = self.game.board[self.game.storeIndex(0)]
        store2 = self.game.board[self.game.storeIndex(1)]
        store1str = f'[{str(store1).rjust(self.maxDigits)}]'
        store2str = f'[{str(store2).rjust(self.maxDigits)}]'
        spaces = self.game._houses * (4 + self.maxDigits) * ' '
        return lead + store2str + trail + spaces + trail + store1str + lead

    def formatState(self):
        return f'\n> Next player: {self.game.nextPlayer + 1}. Select your move (1-{self.game._houses})\n'

    def formatField(self):
        return '\n'.join(
            self.formatFirstPlayer(1) + [self.formatStores()] +
            self.formatFirstPlayer(0))

    def run(self):
        while not self.game.gameEnded and self.canContinue:
            print(self.formatField())
            print(self.formatState())

            move = None
            while move is None:
                try:
                    inp = input()
                    if inp == 'q':
                        return

                    move = (int(inp) - 1) % self.game._houses
                    print(f"> Moving house {move + 1}")
                except:
                    print("> Not a valid move. Try again\n")

            self.game.move(move)

        print(self.formatField())
        print(self.formatState())

        winner = self.game.winningPlayer()
        if winner is not None:
            print(
                f"> Game has ended. Player {self.game.winningPlayer() + 1} won!"
            )
        else:
            print("> Game has ended. Game ended in a tie!")