def test_deal_round_basic(self): azul = Azul() state = azul.get_init_state() state.firstPlayer = 1 state.poolWasTouched = True state = azul.deal_round(state) self.assertEqual(state.nextPlayer, 1) self.assertEqual(state.poolWasTouched, False) for b in state.bins[:-1]: self.assertEqual(sum(b), Azul.BinSize) self.assertEqual(b[Color.Empty], 0) self.assertEqual(state.bins[-1], [int(Color.Empty)] * (Azul.ColorNumber + 1)) # Check that the total number of each color's tiles in all the bins # is exactly what's missing from the bag. for color, count in enumerate(np.sum(np.array(state.bins), axis=0)): if Color(color) == Color.Empty: self.assertEqual(state.bag[color], 0) else: self.assertEqual(state.bag[color], Azul.TileNumber - count)
def test_dealing_round_refills_bag(self): azul = Azul() state = azul.get_init_state() state.bag = np.zeros_like(np.array(state.bag)).tolist() state = azul.deal_round(state) self.assertEqual( np.sum(np.array(state.bag)), Azul.ColorNumber * Azul.TileNumber - Azul.BinNumber * Azul.BinSize)
def test_deal_round_one_color_left(self): azul = Azul() state = azul.get_init_state() bag = np.zeros(Azul.ColorNumber + 1, dtype=np.uint8) bag[Color.Blue] = Azul.TileNumber state.bag = bag.tolist() state = azul.deal_round(state) self.assertEqual(np.sum(np.array(state.bins)[:, Color.Blue]), Azul.TileNumber) self.assertEqual(state.bag, [0] * (Azul.ColorNumber + 1))
def __init__(self): # Init with defaults. super().__init__() self.history = [] # type: List[AzulState] self.azul = Azul() self.state = self.azul.get_init_state() self.state = self.azul.deal_round(self.state) self.botPlayerIndex = 0 # self.budget = 100000 self.budget = 1000 self.samplingWidth = 10 self.explorationWeight = 20
def test_enumerate_moves_basic(self): # Init an empty board, and init with a few tiles. azul = Azul() state = azul.get_init_state() state.set_bin(0, Color.Red, 2) state.set_bin(0, Color.Blue, 1) state.set_bin(0, Color.Black, 1) state.set_bin(1, Color.White, 4) expectedSources = [(0, Color.Red), (0, Color.Blue), (0, Color.Black), (1, Color.White)] targetNumber = Azul.WallSize + 1 expectedTargets = range(targetNumber) def assert_moves_match(sources, targets, exclude=None): exclude = exclude or [] output = list(azul.enumerate_moves(state)) sourceTargetProduct = list(itertools.product(sources, targets)) for expSource, expTarget in sourceTargetProduct: move = Move(expSource[0], expSource[1], expTarget) if move not in exclude: self.assertIn(move, output) self.assertEqual(len(output), len(sourceTargetProduct) - len(exclude)) assert_moves_match(expectedSources, expectedTargets) # Now put something in the pool, we should get extra moves. state.set_bin(Azul.BinNumber, Color.Yellow, 1) expectedSources.append((Azul.BinNumber, Color.Yellow)) assert_moves_match(expectedSources, expectedTargets) # Fill up one of the queues, some moves should become invalid. state.players[0].set_queue(0, Color.White, 1) expectedTargets = range(1, targetNumber) assert_moves_match(expectedSources, expectedTargets) # Block a row of the wall, adding more invalid moves. state.players[0].set_wall(1, 0, Azul.get_wall_slot_color(1, 0)) expectedTargets = range(1, targetNumber) assert_moves_match(expectedSources, expectedTargets, exclude=[Move(1, Color.White, 1)])
def build_greedy_bot(): azul = Azul() def _bot(state: AzulState) -> Move: player = state.players[state.nextPlayer] def get_move_stats(move: Move) -> Tuple[float, ...]: tilesAvailable = state.bins[move.sourceBin][int(move.color)] if move.targetQueue == Azul.WallSize: return tilesAvailable, 0, 0 spaceLeft = move.targetQueue + 1 - player.queue[ move.targetQueue][1] tilesMoved = min(tilesAvailable, spaceLeft) tilesDropped = -min(0, spaceLeft - tilesAvailable) extraSpace = max(0, spaceLeft - tilesAvailable) return tilesDropped, -tilesMoved, extraSpace moves = azul.enumerate_moves(state) moveStats = map(get_move_stats, moves) bestMove = min(zip(moves, moveStats), key=operator.itemgetter(1))[0] return bestMove return _bot
def build_random_bot(): azul = Azul() def _bot(state: AzulState) -> Move: return random.choice(tuple(azul.enumerate_moves(state))) return _bot
def test_refill_bag(self): azul = Azul() state = azul.get_init_state() state.players[0].set_wall_row( 0, [Color.Blue, Color.Yellow, Color.Red, Color.Empty, Color.Empty]) state.players[0].set_wall_row( 1, [Color.Empty, Color.Blue, Color.Empty, Color.Empty, Color.Empty]) state.players[1].set_queue(3, Color.Black, 2) azul._refill_bag(state) self.assertEqual(state.bag, [ 0, Azul.TileNumber - 2, Azul.TileNumber - 1, Azul.TileNumber - 1, Azul.TileNumber - 2, Azul.TileNumber ])
def do_move(self, arg: str): bits = list(map(lambda s: s.strip(), arg.strip().split(' '))) # Parse the command. try: move = Move(int(bits[0]), Azul.str_to_color(bits[1]), int(bits[2])) except ValueError: print("# Invalid command") return self._apply_move(move)
def __call__(self, state): azul = Azul() self.bot = self.botClass(azul, state, samplingWidth=self.samplingWidth, explorationWeight=self.explorationWeight) for _ in range(self.budget): self.bot.step() return self.bot.get_best_move()
def test_hash(self): import copy azul = Azul() state1 = azul.get_init_state() bins = state1.bins bins[1] = [0, 2, 3, 4, 5, 6] state1.bins = bins state1.players[0].set_wall(1, 1, Azul.get_wall_slot_color(1, 1)) state2 = state1.copy() self.assertEqual(state1, state2) self.assertEqual(hash(state1), hash(state2)) state2.set_bin(1, Color.Blue, 5) self.assertNotEqual(state1, state2) self.assertNotEqual(hash(state1), hash(state2)) state2 = state1.copy() state2.players[1].score = 1 self.assertNotEqual(state1, state2) self.assertNotEqual(hash(state1), hash(state2)) state2 = state1.copy() state2.players[1].set_queue(0, Color.Blue, 1) self.assertNotEqual(state1, state2) self.assertNotEqual(hash(state1), hash(state2)) # Trying using as keys in a dict. This shouldn't throw. d1 = {state1: 'a1', state2: 'a2'} d2 = copy.copy(d1) d1[state1] = 'a1' self.assertEqual(d1, d2) d1[state2] = 'a3' self.assertNotEqual(d1, d2)
def test_score_round(self): azul = Azul() state = azul.get_init_state() state.players[0].set_wall(0, 3, Color.Black) state.players[0].set_wall(1, 0, Color.White) state.players[0].set_wall(1, 1, Color.Blue) state.players[0].set_wall(1, 2, Color.Yellow) state.players[0].set_queue(0, Color.White, 1) # Scores 2. state.players[0].set_queue(1, Color.Red, 2) # Scores 6. state.players[0].set_queue(2, Color.Black, 3) # Scores 2. state.players[0].set_queue(3, Color.Red, 3) # Scores 0. state.players[0].set_queue(4, Color.Blue, 5) # Scores 1. state.players[0].floorCount = 3 state.players[1].set_wall_row( 3, [Color.Red, Color.Black, Color.Empty, Color.Blue, Color.Yellow]) state.players[1].set_wall_col( 2, [Color.Red, Color.Yellow, Color.Blue, Color.Empty, Color.Black]) state.players[1].set_queue(0, Color.Yellow, 1) # Scores 2. state.players[1].set_queue(3, Color.White, 4) # Scores 10 state.players[1].floorCount = 1 state = azul.score_round(state) self.assertEqual(state.players[0].score, 7) self.assertEqual(state.players[1].score, 11) self.assertEqual(state.players[0].queue[0:3], [[0, 0]] * 3) self.assertEqual(state.players[0].queue[3], [int(Color.Red), 3]) self.assertEqual(state.players[0].queue[4], [0, 0]) self.assertEqual( state.players[1].queue, np.zeros_like(np.array(state.players[1].queue)).tolist()) self.assertEqual(state.players[0].floorCount, 0) self.assertEqual(state.players[1].floorCount, 0)
def test_playout(self): azul = Azul() state = azul.get_init_state() self.assertFalse(azul.is_game_end(state)) state = azul.playout(state) self.assertTrue(azul.is_game_end(state))
def test_score_game(self): azul = Azul() state = azul.get_init_state() # Fill the main diagonal (all blue), the first row and the first column. for i in range(Azul.WallSize): state.players[0].set_wall(i, i, Azul.get_wall_slot_color(i, i)) state.players[0].set_wall(i, 0, Azul.get_wall_slot_color(i, 0)) state.players[0].set_wall(0, i, Azul.get_wall_slot_color(0, i)) state = azul.score_game(state) self.assertEqual( state.players[0].score, Azul.ScorePerRow + Azul.ScorePerColumn + Azul.ScorePerColor)
def test_is_end_of_game(self): azul = Azul() state = azul.get_init_state() self.assertFalse(azul.is_game_end(state)) state.players[0].set_wall_row( 4, [Color.Yellow, Color.Red, Color.Black, Color.White, Color.Empty]) self.assertFalse(azul.is_game_end(state)) state.players[0].set_wall_col( 0, [Color.Blue, Color.White, Color.Black, Color.Red, Color.Yellow]) self.assertFalse(azul.is_game_end(state)) state.players[1].set_wall_row( 4, [Color.Yellow, Color.Red, Color.Black, Color.White, Color.Blue]) self.assertTrue(azul.is_game_end(state))
def postcmd(self, stop: bool, line: str) -> bool: Azul.print_state(self.state) return super().postcmd(stop, line)
def preloop(self) -> None: super().preloop() Azul.print_state(self.state)
def main(): gamesToPlay = 30 maxRoundsPerGame = 100 samplingWidth = 10 botClass = MctsBotCpp # botClass = MctsBotPy searchManager = GridSearchManager(defaultConfig={}) # searchManager.add_param_axis('mctsBudget', [100, 1000, 10000, 100000, 300000]) searchManager.add_param_axis('mctsBudget', [100, 1000, 10000]) # searchManager.add_param_axis('explorationWeight', [20, 30, 50]) searchManager.add_param_axis('explorationWeight', [20]) outDirPath = Path( os.environ['DEV_OUT_PATH'] ) / 'azul_bot' if 'DEV_OUT_PATH' in os.environ else Path.cwd() outDirPath.mkdir(parents=True, exist_ok=True) resultRows = [] for config, _, _ in searchManager.generate_configuration(): azul = Azul() # players = [build_random_bot(), build_mcts_bot(mctsBudget)] players = [ build_greedy_bot(), MctsBotWrapper(config['mctsBudget'], samplingWidth, config['explorationWeight'], botClass=botClass) ] scores = [] timePerMove = [] timer = StageTimer() for iGame in range(gamesToPlay): timer.start_pass() state = azul.get_init_state() roundCount = 0 moveCount = 0 for _ in range(maxRoundsPerGame): timer.start_stage('deal') state = azul.deal_round(state) while not azul.is_round_end(state): timer.start_stage('decide') move = players[state.nextPlayer](state) timer.start_stage('move') state = azul.apply_move_without_scoring(state, move).state moveCount += 1 if state.nextPlayer == 1 else 0 timer.start_stage('score') state = azul.score_round(state) roundCount += 1 if azul.is_game_end(state): state = azul.score_game(state) break timer.end_stage() if azul.is_game_end(state): timer.end_pass() dur = timer.get_pass_duration() scores.append([state.players[0].score, state.players[1].score]) timePerMove.append(timer.get_pass_timings()['decide'] / moveCount) print( f"Finished a game with scores {state.players[0].score}:{state.players[1].score}" f" in {roundCount} rounds and {dur:.2f} s.") else: print("Timed out playing a game.") timer.end() scoresArray = np.array(scores) scoresAvg = scoresArray.mean(axis=0) winsFirst = np.count_nonzero(scoresArray[:, 0] > scoresArray[:, 1]) winsSecond = np.count_nonzero(scoresArray[:, 0] < scoresArray[:, 1]) # Could be a draw. for i in range(scoresArray.shape[0]): resultRows.append({ 'player': 'greedy', 'budget': 0, 'explorationWeight': 0, 'score': scoresArray[i, 0], 'isWin': scoresArray[i, 0] > scoresArray[i, 1] }) resultRows.append({ 'player': 'mcts', 'budget': config['mctsBudget'], 'explorationWeight': config['explorationWeight'], 'score': scoresArray[i, 1], 'isWin': scoresArray[i, 0] < scoresArray[i, 1] }) print(timer.get_total_report()) print("Average scores: {:.1f} {:.1f}".format(*tuple(scoresAvg))) print("Average time per move: {:.1f}".format( np.mean(np.array(timePerMove)))) print("Wins: {} vs {}".format(winsFirst, winsSecond)) print("Plotting.") resultTable = pd.DataFrame(resultRows) print(resultTable) # resultTable = resultTable[resultTable['player'] == 'mcts'] ax = sns.boxplot(x='budget', y='score', hue='explorationWeight', data=resultTable) date = datetime.now() dateStr = date.strftime("%y%m%d-%H%M%S") ax.get_figure().savefig(outDirPath / f'{dateStr}_scores.pdf') plt.show() resultTable.to_csv(outDirPath / f'{dateStr}_metrics.csv', sep='\t') print("Done.")
def test_full_game(self): # Test a full recorded game. azul = Azul() state = azul.get_init_state() tilesPerRoundRaw = [ 'RWYYRYUURRWWKKYYWWUR', 'WWUUUURKKKRUYKRUKYYU', 'URYKUWYYURYYKKURWWYK', 'RKKWUUWWRRKUUKWWWKRR', 'KYYRRYWKYYUWWWKRYRKU' ] tilesPerRound = [ list(map(Azul.str_to_color, tiles)) for tiles in tilesPerRoundRaw ] # Moves alternate between players 0 and 1 within a round. # The game keeps track of which player goes first each round, we only specify the very first. firstPlayer = 0 movesPerRoundRaw = [[ '3K1', '4W3', '0Y3', '2W3', '5R2', '5Y1', '5W0', '1R0', '5U4', '5Y5' ], [ '4Y3', '2K4', '0U4', '1U2', '3K0', '5Y0', '5R1', '5K4', '5U2', '5W1' ], [ '4K4', '2Y1', '5U2', '1Y2', '0K0', '3R0', '5U1', '5Y2', '5K3', '5W4', '5R5' ], [ '3W2', '1W4', '2R4', '5U3', '5K3', '0R5', '5W2', '5K2', '4R1', '5K2', '5W0' ], [ '2U1', '4U0', '3W2', '5R4', '5K3', '5Y2', '5W2', '1R4', '5Y0', '5K1', '0K3', '5Y3', '5R4', '5W5' ]] movesPerRound = [ list(map(Move.from_str, moves)) for moves in movesPerRoundRaw ] scoresPerRound = [[4, 3], [19, 5], [33, 16], [45, 37], [54, 55]] finalScores = [70, 71] # Simulate the game and check the score each round. state.nextPlayer = firstPlayer for iRound, (tiles, moves, scores) in enumerate( zip(tilesPerRound, movesPerRound, scoresPerRound)): state = azul.deal_round(state, tiles) for move in moves: state = azul.apply_move_without_scoring(state, move).state self.assertTrue(azul.is_round_end(state)) state = azul.score_round(state) self.assertEqual([p.score for p in state.players], scores) state = azul.score_game(state) self.assertEqual([p.score for p in state.players], finalScores)
class AzulCmd(cmd.Cmd): prompt = '> ' def __init__(self): # Init with defaults. super().__init__() self.history = [] # type: List[AzulState] self.azul = Azul() self.state = self.azul.get_init_state() self.state = self.azul.deal_round(self.state) self.botPlayerIndex = 0 # self.budget = 100000 self.budget = 1000 self.samplingWidth = 10 self.explorationWeight = 20 def preloop(self) -> None: super().preloop() Azul.print_state(self.state) def postcmd(self, stop: bool, line: str) -> bool: Azul.print_state(self.state) return super().postcmd(stop, line) def do_move(self, arg: str): bits = list(map(lambda s: s.strip(), arg.strip().split(' '))) # Parse the command. try: move = Move(int(bits[0]), Azul.str_to_color(bits[1]), int(bits[2])) except ValueError: print("# Invalid command") return self._apply_move(move) def do_bot_move(self, arg: str): bot = MctsBot(self.azul, self.state, samplingWidth=self.samplingWidth, explorationWeight=self.explorationWeight) move = bot.step_n(self.budget) print(f"Bot's move: ") print( f"Take {Azul.color_to_str(move.color)} from bin {move.sourceBin} to queue {move.targetQueue}" ) self._apply_move(move) def _apply_move(self, move): if self.azul.is_game_end(self.state): print("The game is over, can't do moves.") return self.history.append(self.state) # Apply the move. try: outcome = self.azul.apply_move_without_scoring(self.state, move) self.state = outcome.state if self.azul.is_round_end(self.state): self.state = self.azul.score_round(self.state) if not self.azul.is_game_end(self.state): self.state = self.azul.deal_round(self.state) else: self.state = self.azul.score_game(self.state) winnerIndex = int(self.state.players[0].score > self.state.players[1].score) humanIndex = 1 - self.botPlayerIndex print(f"=== Game Over! ===") print( f"{'Human' if winnerIndex != self.botPlayerIndex else 'Bot'} player wins" ) print( f"Scores: Bot = {self.state.players[self.botPlayerIndex].score} " f"Human = {self.state.players[humanIndex].score}") except ValueError as e: print(f"# {e}") print("Undoing the move.") self.state = self.history.pop() def do_undo(self, arg: str): if len(self.history) == 0: print("# This is the first turn.") return print("# UNDO #") self.state = self.history.pop()
def test_apply_move_sequence(self): # This case is taken from the rulebook. azul = Azul() state = azul.get_init_state() state.set_bin(0, Color.Yellow, 1) state.set_bin(0, Color.Black, 2) state.set_bin(0, Color.White, 1) state.set_bin(1, Color.Yellow, 1) state.set_bin(1, Color.Red, 3) state.firstPlayer = 1 # We will change it and check later. state = azul.apply_move_without_scoring(state, Move( 0, Color.Black, 1, )).state # The pool should hold the leftovers. self.assertEqual(state.bins[-1], [0, 0, 1, 0, 0, 1]) # See 'Color'. # The bin should be empty self.assertEqual(state.bins[0], [0] * (Azul.ColorNumber + 1)) # The other bin shouldn't change. self.assertEqual(state.bins[1], [0, 0, 1, 3, 0, 0]) # The queue should only hold black. self.assertEqual(state.players[0].queue[1], [int(Color.Black), 2]) for i, q in enumerate(state.players[0].queue): if i != 1: self.assertEqual(q, [0, 0]) # Nothing should be on the floor. self.assertEqual(state.players[0].floorCount, 0) # The wall shouldn't be affected. self.assertEqual( np.count_nonzero(np.array(state.players[0].wall, dtype=np.int32)), 0) # Player two shouldn't be affected. self.assertEqual(np.count_nonzero(state.players[1].queue), 0) # Next player is tobe updated. self.assertEqual(state.nextPlayer, 1) # Make a few more moves. state = azul.apply_move_without_scoring(state, Move( 1, Color.Yellow, 2, )).state state = azul.apply_move_without_scoring( state, Move(Azul.BinNumber, Color.Red, 3)).state # Check the pool. self.assertEqual(state.bins[-1], [0, 0, 1, 0, 0, 1]) self.assertEqual(state.poolWasTouched, True) # Check the first player queues. self.assertEqual(state.players[0].queue[1], [int(Color.Black), 2]) self.assertEqual(state.players[0].queue[3], [int(Color.Red), 3]) for i, q in enumerate(state.players[0].queue): if i != 1 and i != 3: self.assertEqual(q, [0, 0]) # Check the second player queues. self.assertEqual(state.players[1].queue[2], [int(Color.Yellow), 1]) for i, q in enumerate(state.players[1].queue): if i != 2: self.assertEqual(q, [0, 0]) # Check the floors. self.assertEqual(state.players[0].floorCount, 1) self.assertEqual(state.players[1].floorCount, 0) # The wall shouldn't be affected. self.assertEqual( np.count_nonzero(np.array(state.players[0].wall, dtype=np.int32)), 0) self.assertEqual( np.count_nonzero(np.array(state.players[1].wall, dtype=np.int32)), 0) # Check the next player. self.assertEqual(state.nextPlayer, 1) # Check who goes first next round. self.assertEqual(state.firstPlayer, 0)