class OptimalPlaySimulationTests(unittest.TestCase): def setUp(self): self.action_selector = OptimalActionSelector() def tearDown(self): pass def testExpectedScoreOfThreeDieThrow(self): state = SearchState(0, 8, 2, 1, 0) expected_score = self.action_selector.expected_score(state) state = SideDiceState({DieFace.Ray: 8, DieFace.Cow: 2}) num_runs = 100000 scores = [ (key, sum(1 for _ in iter)) for key, iter in itertools.groupby( sorted(play_turn(self.action_selector, ini_side_dice = state) for _ in range(num_runs)) ) ] simulated_score = sum(score * count for score, count in scores) / num_runs print(scores, simulated_score, expected_score) self.assertAlmostEqual(expected_score, simulated_score, delta = 0.01) def testExpectedScoreOfThreeDieThrow2(self): state = SearchState(4, 4, 2, 1, 0) expected_score = self.action_selector.expected_score(state) state = SideDiceState({ DieFace.Tank: 4, DieFace.Ray: 4, DieFace.Chicken: 2 }) num_runs = 100000 scores = [ (key, sum(1 for _ in iter)) for key, iter in itertools.groupby( sorted(play_turn(self.action_selector, ini_side_dice = state) for _ in range(num_runs)) ) ] simulated_score = sum(score * count for score, count in scores) / num_runs print(scores, simulated_score, expected_score) self.assertAlmostEqual(expected_score, simulated_score, delta = 0.01) def testSimulator(self): state = SideDiceState({ DieFace.Ray: 9, DieFace.Cow: 2}) num_runs = 100000 scores = [ (key, sum(1 for _ in iter)) for key, iter in itertools.groupby( sorted(play_turn(self.action_selector, ini_side_dice = state) for _ in range(num_runs)) ) ] print(scores) avg_score = sum(score * count for score, count in scores) / num_runs print(avg_score) self.assertAlmostEqual(avg_score, (314 / 3) / 36, delta = 0.01)
def setUp(self): self.action_selector = OptimalActionSelector(consider_win_score=True)
class OptimalPlayUnitTests(unittest.TestCase): def setUp(self): self.action_selector = OptimalActionSelector(consider_win_score=True) def tearDown(self): pass def testExpectedScoreOfOneDieThrow(self): state = SearchState(0, 10, 2, 1, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (2 * 3 + 4 * 2) / 6) def testExpectedScoreOfOneDieThrow2(self): state = SearchState(0, 10, 2, 2, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (1 * 6 + 5 * 2) / 6) def testExpectedScoreOfOneDieThrow3(self): state = SearchState(5, 5, 2, 1, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (2 * 3 + 3 * 2 + 1 * 0) / 6) def testExpectedScoreOfOneDieThrow4(self): state = SearchState(0, 9, 3, 2, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (1 * 7 + 5 * 3) / 6) def testExpectedScoreOfOneDieThrow5(self): state = SearchState(0, 10, 2, 1, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (2 * 3 + 4 * 2) / 6) def testExpectedScoreOfTwoDieThrow1(self): state = SearchState(0, 9, 2, 1, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (314 / 3) / 36) def testExpectedScoreOfTwoDieThrow2(self): state = SearchState(3, 3, 5, 2, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, (10 * 1 + 9 * 8 + 5 * 12 + (5 + 2 / 3) * 8 + 0 * 7) / 36) def testExpectedScoreOfTwoDieThrow3(self): # Similar to previous, but in this case, best move is to pass state = SearchState(2, 2, 7, 2, 0) expected_score = self.action_selector.expected_score(state) self.assertAlmostEqual(expected_score, 7) def testExpectedScoreOfTwoDieThrow_NoAsserts1(self): state = SearchState(5, 6, 0, 0, 0) # Should not fail any assert built-in asserts expected_score = self.action_selector.expected_score(state) def testExpectedScoreOfTwoDieThrow_NoAsserts2(self): state = SearchState(0, 8, 3, 2, 0) # Should not fail any assert built-in asserts expected_score = self.action_selector.expected_score(state) def testExpectedScoreOfFourDie(self): state = SearchState(5, 4, 0, 0, 0) # Should not fail any assert built-in asserts expected_score = self.action_selector.expected_score(state) def testExpectedScoreOfFiveDie(self): state = SearchState(5, 0, 3, 2, 0) # Should not fail any assert built-in asserts expected_score = self.action_selector.expected_score(state) def testShouldStopWhenWinningEvenWhenOddsAreGood(self): state = TurnState(side_dice=SideDiceState({ DieFace.Ray: 5, DieFace.Cow: 2 })) self.assertTrue(self.action_selector.should_stop(state, 1)) self.assertTrue(self.action_selector.should_stop(state, 2)) self.assertFalse(self.action_selector.should_stop(state, 3)) self.assertFalse(self.action_selector.should_stop(state)) def testDieChoiceDependsOnWinScore1(self): # State with four different choices depending on win score state = TurnState(side_dice=SideDiceState({DieFace.Tank: 2}), throw=DiceThrow({ DieFace.Ray: 2, DieFace.Cow: 2, DieFace.Human: 3, DieFace.Chicken: 4 })) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 1)) self.assertEqual(DieFace.Cow, self.action_selector.select_die(state, 2)) self.assertEqual(DieFace.Human, self.action_selector.select_die(state, 3)) def testDieChoiceDependsOnWinScore2(self): # State where choice deviates only for a couple of higher win scores. # It is more typical that there is a deviation for lower win scores up until a limit. state = TurnState(side_dice=SideDiceState({DieFace.Tank: 1}), throw=DiceThrow({ DieFace.Ray: 4, DieFace.Cow: 1, DieFace.Human: 3, DieFace.Chicken: 4 })) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 3)) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state, 4)) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state, 5)) score_ray0 = self.action_selector.expected_score( SearchState(1, 4, 0, 0, 0)) score_ray4 = self.action_selector.expected_score( SearchState(1, 4, 0, 0, 4)) score_kip0 = self.action_selector.expected_score( SearchState(1, 0, 4, 1, 0)) score_kip4 = self.action_selector.expected_score( SearchState(1, 0, 4, 1, 4)) self.assertGreater(score_ray0, score_kip0) self.assertGreater(score_kip4, score_ray4) self.assertGreaterEqual(score_ray0, score_ray4) self.assertGreaterEqual(score_kip0, score_kip4) def testDieChoiceDependsOnWinScore3(self): # State where choice deviates for low and high, but not medium scores state = TurnState(side_dice=SideDiceState({DieFace.Tank: 1}), throw=DiceThrow({ DieFace.Ray: 5, DieFace.Cow: 1, DieFace.Human: 2, DieFace.Chicken: 4 })) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state)) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state, 4)) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state, 5)) self.assertEqual(DieFace.Chicken, self.action_selector.select_die(state, 6)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 1)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 2)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 3)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 7)) self.assertEqual(DieFace.Ray, self.action_selector.select_die(state, 8))
import itertools import json from service.BaseHandler import GameHandler, HandlerException, ok_message, error_message from service.Common import is_bot_name, Config from service.GameState import GameState from game.DataTypes import TurnState, TurnPhase, DieFace from game.Game import RandomPlayer, AggressivePlayer, DefensivePlayer from game.OptimalPlay import OptimalActionSelector bot_behaviours = { "random": RandomPlayer(), "aggressive": AggressivePlayer(), "defensive": DefensivePlayer(), "smart": OptimalActionSelector() } str2die = { "ray": DieFace.Ray, "chicken": DieFace.Chicken, "cow": DieFace.Cow, "human": DieFace.Human } class GamePlayHandler(GameHandler): """Handles game play.""" def check_expect_move(self, game_state): if game_state is None or not game_state.awaits_input: raise HandlerException(f"Not awaiting a move") def check_my_move(self, game_state):
logger = logging.getLogger('game.GameStats') def build_pvp_matrix(players, num_runs): n = len(players) wins = np.zeros((n, n)) for run in range(num_runs): logger.info("Run %d", run) for p1, p2 in itertools.product(range(n), range(n)): p1_wins = play_game([players[p1], players[p2]]) == 0 if p1_wins: wins[p1, p2] += 1 wins /= num_runs return wins if __name__ == '__main__': logging.getLogger('game').setLevel(logging.INFO) logging.getLogger('game').addHandler(logging.StreamHandler()) op1 = OptimalActionSelector(consider_win_score = False) op2 = OptimalActionSelector(consider_win_score = True) players = [RandomPlayer(), AggressivePlayer(), DefensivePlayer(), op1, op2] m = build_pvp_matrix(players, 1000) print(m) # Result from run with num_runs = 100000 # [[0.52269 0.46619 0.14833 0.07076 0.06384] # [0.57006 0.51936 0.26260 0.15804 0.15225] # [0.88807 0.78197 0.55335 0.29518 0.27899] # [0.95299 0.87960 0.79029 0.55612 0.54123] # [0.95552 0.88500 0.80395 0.57171 0.55634]]
def __init__(self, show_hint=False): self.hint_generator = OptimalActionSelector() if show_hint else None
class HumanPlayer: def __init__(self, show_hint=False): self.hint_generator = OptimalActionSelector() if show_hint else None def show_options(self, options): items = [] for option in options: items.append("[%s] %s" % (die2key[option], option.name)) print(" ".join(items)) def show_hint(self, state: TurnState): num_earthling_types = len(state.side_dice.collected_earthlings) scores = [(self.hint_generator.expected_score( SearchState(state.side_dice[DieFace.Tank], state.side_dice[DieFace.Ray], state.side_dice.num_earthlings + action, num_earthling_types + 1)), action) for action in list(set( state.throw[x] for x in state.selectable_earthlings))] if state.throw[DieFace.Ray] > 0: scores.append((self.hint_generator.expected_score( SearchState( state.side_dice[DieFace.Tank], state.side_dice[DieFace.Ray] + state.throw[DieFace.Ray], state.side_dice.num_earthlings, num_earthling_types)), 0)) for score, action in sorted(scores, key=lambda x: x[0], reverse=True): if action > 0: die = next(die for die in state.selectable_earthlings if state.throw[die] == action) choice = "%d [%s] Earthling%s" % (action, die2key[die], "s" if action > 1 else "") else: choice = "%d [R] Ray%s" % (state.throw[ DieFace.Ray], "s" if state.throw[DieFace.Ray] > 1 else "") print("%.3f %s" % (score, choice)) def select_die(self, state: TurnState): options = state.selectable_earthlings if state.throw[DieFace.Ray] > 0: options.append(DieFace.Ray) if self.hint_generator is not None: self.show_hint(state) else: self.show_options(options) while True: key = input("Your choice : ").upper() if key in key2die: return key2die[key] def should_stop(self, state: TurnState): while True: choice = input("Continue (Y/N)? : ").upper() if choice == "Y" or choice == "N": return choice == "N" def __str__(self): return "HumanPlayer"
def setUp(self): self.action_selector = OptimalActionSelector()