Beispiel #1
0
    def test_score_round(self):

        azul = Azul()
        state = azul.get_init_state()

        state.players[0].set_wall(0, 3, Color.Black)
        state.players[0].set_wall(1, 0, Color.White)
        state.players[0].set_wall(1, 1, Color.Blue)
        state.players[0].set_wall(1, 2, Color.Yellow)
        state.players[0].set_queue(0, Color.White, 1)  # Scores 2.
        state.players[0].set_queue(1, Color.Red, 2)  # Scores 6.
        state.players[0].set_queue(2, Color.Black, 3)  # Scores 2.
        state.players[0].set_queue(3, Color.Red, 3)  # Scores 0.
        state.players[0].set_queue(4, Color.Blue, 5)  # Scores 1.

        state.players[0].floorCount = 3

        state.players[1].set_wall_row(
            3, [Color.Red, Color.Black, Color.Empty, Color.Blue, Color.Yellow])
        state.players[1].set_wall_col(
            2, [Color.Red, Color.Yellow, Color.Blue, Color.Empty, Color.Black])
        state.players[1].set_queue(0, Color.Yellow, 1)  # Scores 2.
        state.players[1].set_queue(3, Color.White, 4)  # Scores 10
        state.players[1].floorCount = 1

        state = azul.score_round(state)

        self.assertEqual(state.players[0].score, 7)
        self.assertEqual(state.players[1].score, 11)

        self.assertEqual(state.players[0].queue[0:3], [[0, 0]] * 3)
        self.assertEqual(state.players[0].queue[3], [int(Color.Red), 3])
        self.assertEqual(state.players[0].queue[4], [0, 0])

        self.assertEqual(
            state.players[1].queue,
            np.zeros_like(np.array(state.players[1].queue)).tolist())

        self.assertEqual(state.players[0].floorCount, 0)
        self.assertEqual(state.players[1].floorCount, 0)
Beispiel #2
0
class AzulCmd(cmd.Cmd):

    prompt = '> '

    def __init__(self):
        # Init with defaults.
        super().__init__()

        self.history = []  # type: List[AzulState]
        self.azul = Azul()
        self.state = self.azul.get_init_state()
        self.state = self.azul.deal_round(self.state)

        self.botPlayerIndex = 0
        # self.budget = 100000
        self.budget = 1000
        self.samplingWidth = 10
        self.explorationWeight = 20

    def preloop(self) -> None:
        super().preloop()

        Azul.print_state(self.state)

    def postcmd(self, stop: bool, line: str) -> bool:
        Azul.print_state(self.state)

        return super().postcmd(stop, line)

    def do_move(self, arg: str):
        bits = list(map(lambda s: s.strip(), arg.strip().split(' ')))

        # Parse the command.
        try:
            move = Move(int(bits[0]), Azul.str_to_color(bits[1]), int(bits[2]))
        except ValueError:
            print("# Invalid command")
            return

        self._apply_move(move)

    def do_bot_move(self, arg: str):
        bot = MctsBot(self.azul,
                      self.state,
                      samplingWidth=self.samplingWidth,
                      explorationWeight=self.explorationWeight)
        move = bot.step_n(self.budget)

        print(f"Bot's move: ")
        print(
            f"Take {Azul.color_to_str(move.color)} from bin {move.sourceBin} to queue {move.targetQueue}"
        )

        self._apply_move(move)

    def _apply_move(self, move):
        if self.azul.is_game_end(self.state):
            print("The game is over, can't do moves.")
            return

        self.history.append(self.state)

        # Apply the move.
        try:
            outcome = self.azul.apply_move_without_scoring(self.state, move)
            self.state = outcome.state

            if self.azul.is_round_end(self.state):
                self.state = self.azul.score_round(self.state)

                if not self.azul.is_game_end(self.state):
                    self.state = self.azul.deal_round(self.state)
                else:
                    self.state = self.azul.score_game(self.state)

                    winnerIndex = int(self.state.players[0].score >
                                      self.state.players[1].score)
                    humanIndex = 1 - self.botPlayerIndex
                    print(f"=== Game Over! ===")
                    print(
                        f"{'Human' if winnerIndex != self.botPlayerIndex else 'Bot'} player wins"
                    )
                    print(
                        f"Scores: Bot = {self.state.players[self.botPlayerIndex].score}    "
                        f"Human = {self.state.players[humanIndex].score}")

        except ValueError as e:
            print(f"# {e}")
            print("Undoing the move.")

            self.state = self.history.pop()

    def do_undo(self, arg: str):
        if len(self.history) == 0:
            print("# This is the first turn.")
            return

        print("# UNDO #")
        self.state = self.history.pop()
Beispiel #3
0
def main():
    gamesToPlay = 30
    maxRoundsPerGame = 100
    samplingWidth = 10
    botClass = MctsBotCpp
    # botClass = MctsBotPy

    searchManager = GridSearchManager(defaultConfig={})
    # searchManager.add_param_axis('mctsBudget', [100, 1000, 10000, 100000, 300000])
    searchManager.add_param_axis('mctsBudget', [100, 1000, 10000])
    # searchManager.add_param_axis('explorationWeight', [20, 30, 50])
    searchManager.add_param_axis('explorationWeight', [20])

    outDirPath = Path(
        os.environ['DEV_OUT_PATH']
    ) / 'azul_bot' if 'DEV_OUT_PATH' in os.environ else Path.cwd()
    outDirPath.mkdir(parents=True, exist_ok=True)

    resultRows = []

    for config, _, _ in searchManager.generate_configuration():
        azul = Azul()
        # players = [build_random_bot(), build_mcts_bot(mctsBudget)]
        players = [
            build_greedy_bot(),
            MctsBotWrapper(config['mctsBudget'],
                           samplingWidth,
                           config['explorationWeight'],
                           botClass=botClass)
        ]
        scores = []
        timePerMove = []

        timer = StageTimer()
        for iGame in range(gamesToPlay):
            timer.start_pass()

            state = azul.get_init_state()

            roundCount = 0
            moveCount = 0
            for _ in range(maxRoundsPerGame):
                timer.start_stage('deal')
                state = azul.deal_round(state)

                while not azul.is_round_end(state):
                    timer.start_stage('decide')
                    move = players[state.nextPlayer](state)
                    timer.start_stage('move')
                    state = azul.apply_move_without_scoring(state, move).state
                    moveCount += 1 if state.nextPlayer == 1 else 0

                timer.start_stage('score')
                state = azul.score_round(state)
                roundCount += 1

                if azul.is_game_end(state):
                    state = azul.score_game(state)
                    break

            timer.end_stage()

            if azul.is_game_end(state):
                timer.end_pass()
                dur = timer.get_pass_duration()
                scores.append([state.players[0].score, state.players[1].score])
                timePerMove.append(timer.get_pass_timings()['decide'] /
                                   moveCount)
                print(
                    f"Finished a game with scores {state.players[0].score}:{state.players[1].score}"
                    f" in {roundCount} rounds and {dur:.2f} s.")

            else:
                print("Timed out playing a game.")

        timer.end()

        scoresArray = np.array(scores)
        scoresAvg = scoresArray.mean(axis=0)
        winsFirst = np.count_nonzero(scoresArray[:, 0] > scoresArray[:, 1])
        winsSecond = np.count_nonzero(scoresArray[:, 0] < scoresArray[:, 1])
        # Could be a draw.

        for i in range(scoresArray.shape[0]):
            resultRows.append({
                'player': 'greedy',
                'budget': 0,
                'explorationWeight': 0,
                'score': scoresArray[i, 0],
                'isWin': scoresArray[i, 0] > scoresArray[i, 1]
            })

            resultRows.append({
                'player': 'mcts',
                'budget': config['mctsBudget'],
                'explorationWeight': config['explorationWeight'],
                'score': scoresArray[i, 1],
                'isWin': scoresArray[i, 0] < scoresArray[i, 1]
            })

        print(timer.get_total_report())
        print("Average scores: {:.1f} {:.1f}".format(*tuple(scoresAvg)))
        print("Average time per move: {:.1f}".format(
            np.mean(np.array(timePerMove))))
        print("Wins: {} vs {}".format(winsFirst, winsSecond))

    print("Plotting.")
    resultTable = pd.DataFrame(resultRows)
    print(resultTable)

    # resultTable = resultTable[resultTable['player'] == 'mcts']
    ax = sns.boxplot(x='budget',
                     y='score',
                     hue='explorationWeight',
                     data=resultTable)
    date = datetime.now()
    dateStr = date.strftime("%y%m%d-%H%M%S")
    ax.get_figure().savefig(outDirPath / f'{dateStr}_scores.pdf')

    plt.show()

    resultTable.to_csv(outDirPath / f'{dateStr}_metrics.csv', sep='\t')

    print("Done.")
Beispiel #4
0
    def test_full_game(self):
        # Test a full recorded game.
        azul = Azul()
        state = azul.get_init_state()

        tilesPerRoundRaw = [
            'RWYYRYUURRWWKKYYWWUR', 'WWUUUURKKKRUYKRUKYYU',
            'URYKUWYYURYYKKURWWYK', 'RKKWUUWWRRKUUKWWWKRR',
            'KYYRRYWKYYUWWWKRYRKU'
        ]

        tilesPerRound = [
            list(map(Azul.str_to_color, tiles)) for tiles in tilesPerRoundRaw
        ]

        # Moves alternate between players 0 and 1 within a round.
        # The game keeps track of which player goes first each round, we only specify the very first.
        firstPlayer = 0
        movesPerRoundRaw = [[
            '3K1', '4W3', '0Y3', '2W3', '5R2', '5Y1', '5W0', '1R0', '5U4',
            '5Y5'
        ],
                            [
                                '4Y3', '2K4', '0U4', '1U2', '3K0', '5Y0',
                                '5R1', '5K4', '5U2', '5W1'
                            ],
                            [
                                '4K4', '2Y1', '5U2', '1Y2', '0K0', '3R0',
                                '5U1', '5Y2', '5K3', '5W4', '5R5'
                            ],
                            [
                                '3W2', '1W4', '2R4', '5U3', '5K3', '0R5',
                                '5W2', '5K2', '4R1', '5K2', '5W0'
                            ],
                            [
                                '2U1', '4U0', '3W2', '5R4', '5K3', '5Y2',
                                '5W2', '1R4', '5Y0', '5K1', '0K3', '5Y3',
                                '5R4', '5W5'
                            ]]

        movesPerRound = [
            list(map(Move.from_str, moves)) for moves in movesPerRoundRaw
        ]

        scoresPerRound = [[4, 3], [19, 5], [33, 16], [45, 37], [54, 55]]
        finalScores = [70, 71]

        # Simulate the game and check the score each round.
        state.nextPlayer = firstPlayer
        for iRound, (tiles, moves, scores) in enumerate(
                zip(tilesPerRound, movesPerRound, scoresPerRound)):
            state = azul.deal_round(state, tiles)

            for move in moves:
                state = azul.apply_move_without_scoring(state, move).state

            self.assertTrue(azul.is_round_end(state))
            state = azul.score_round(state)

            self.assertEqual([p.score for p in state.players], scores)

        state = azul.score_game(state)
        self.assertEqual([p.score for p in state.players], finalScores)