def test_cards(): """Ensure all cards are in expected range and expected number was discarded""" low_card = 3 high_card = 35 discard = 9 game = nothanks.Game([], low_card=low_card, high_card=high_card, discard=discard) num_cards = 0 while game.deck: num_cards += 1 assert low_card <= game.deal_card() <= high_card assert num_cards == high_card + 1 - low_card - discard low_card = 1 high_card = 10 discard = 2 game = nothanks.Game([], low_card=low_card, high_card=high_card, discard=discard) num_cards = 0 while game.deck: num_cards += 1 assert low_card <= game.deal_card() <= high_card assert num_cards == high_card + 1 - low_card - discard
def test_bad_player(): """Ensure exceptions raised in player actions are caught by nothanks.Game.""" class BadPlayer(nothanks.Player): def play(): # will raise exception because of bad argument count pass def update(): # will raise exception because of bad argument count pass def prepare_for_new_game( ): # will raise exception because of bad argument count pass bad_player = BadPlayer() players = [nothanks.Player(), nothanks.Player(), bad_player] game = nothanks.Game(players) # Double-check that these raise an error with pytest.raises(Exception): bad_player.play(35, 0) with pytest.raises(Exception): bad_player.update(id(bad_player), 35, 0, False) with pytest.raises(Exception): bad_player.prepare_for_new_game([id(p) for p in players]) # Test that the game captures the errors game.run()
def test_update_game_pass(): """Ensure basic No Thanks game rules are followed.""" players = [nothanks.Player(), nothanks.Player(), nothanks.Player()] game = nothanks.Game(players) player = next(game.player_cycler) state = game.state[id(player)] # When a player passes: # See that they lose a coin # And the pot grows by one # And the card does not change # And that a different player goes next prev_coins = state['coins'] card = game.deal_card() pot = 10 new_player, new_card, new_pot = game.update_game(player, card, pot, False) assert card not in state['cards'] assert state['coins'] == prev_coins - 1 assert new_player is not player assert new_card == card assert new_pot == pot + 1 # Trying to pass with no coins should raise an exception. state['coins'] = 0 with pytest.raises(Exception): game.update_game(player, card, pot, False)
def compete(strategies, num_rounds=1000): """Create and run No Thanks competition.""" # Play 3, 4, and 5 player games game_sizes = [3, 4, 5] # A dictionary of scores results = {} for num_players in game_sizes: results[num_players] = {} for strategy in strategies: results[num_players][strategy] = 0 for num_players in game_sizes: for _ in ProgressBar( 'Playing {}-player games'.format(num_players)).iter( range(num_rounds)): selected_strategies = choices(strategies, k=num_players) players = [import_module(s).Player() for s in selected_strategies] winners, _ = nothanks.Game(players).run() for strategy, player in zip(selected_strategies, players): results[num_players][strategy] -= 1 / num_players / num_rounds if id(player) in winners: results[num_players][strategy] += 1 / len( winners) / num_rounds results = pd.DataFrame(results) results['combined'] = results.sum(axis=1) results.loc['total', :] = results.sum(axis=0) return results
def test_player_action(): """Ensure basic No Thanks game rules are followed.""" players = [nothanks.Player(), nothanks.Player(), nothanks.Player()] game = nothanks.Game(players) player = next(game.player_cycler) state = game.state[id(player)] card = game.deal_card() pot = 0 # Players with no coins MUST take the card and pot state['coins'] = 0 took_card = game.player_action(player, card, pot) assert took_card
def test_scoring(): """Test the method of nothanks.Game that calculates each player's score""" player = nothanks.Player() game = nothanks.Game([player]) pid = id(player) player_state = game.state[pid] player_state['cards'] = SortedSet() player_state['coins'] = 0 assert game.get_scores()[pid] == 0 player_state['cards'] = SortedSet() player_state['coins'] = 55 assert game.get_scores()[pid] == -55 player_state['cards'] = SortedSet(range(3, 36)) player_state['coins'] = 0 assert game.get_scores()[pid] == 3 player_state['cards'] = SortedSet([10, 11, 12, 14, 16, 17]) player_state['coins'] = 8 assert game.get_scores()[pid] == 10 + 14 + 16 - 8
def test_update_game_take(): """Ensure basic No Thanks game rules are followed.""" players = [nothanks.Player(), nothanks.Player(), nothanks.Player()] game = nothanks.Game(players) player = next(game.player_cycler) state = game.state[id(player)] # When a player takes a card and pot: # See that card and pot are added to player state # And that the same player goes again # And that a new card is dealt prev_coins = state['coins'] card = game.deal_card() pot = 10 new_player, new_card, new_pot = game.update_game(player, card, pot, True) assert card in state['cards'] assert state['coins'] == prev_coins + pot assert new_player is player assert new_card is not card assert new_pot == 0 # Trying to take the same card again should raise an exception. with pytest.raises(Exception): game.update_game(player, card, pot, True)
def train(self, num_games, print_progress=True): # Create a set of identical players, each referencing this game tree players = [ Player(self, num_players=self.num_players, starting_coins=self.starting_coins, low_card=self.low_card, high_card=self.high_card, discard=self.discard) for _ in range(self.num_players) ] # Simulate self-play if print_progress: iterable = ProgressBar('Training').iter(range(num_games)) else: iterable = range(num_games) for _ in iterable: winners, _ = nothanks.Game(players, starting_coins=self.starting_coins, low_card=self.low_card, high_card=self.high_card, discard=self.discard).run() # Update the shared game tree based on game results. for player in players: payoff = -1 / self.num_players if id(player) in winners: payoff += 1 / len(winners) for state_hash, action in player.history.items(): node = self.tree.nodes[state_hash] state = node['state'] # If the player has no coins, there is no decision to make. if not node['can_pass']: continue # If the expected return from the alternate decision is # higher than the return from this game, the player regrets # not having chosen the alternate decision. take_state = deepcopy(state) take_state.take() take_edge = self.tree.edges[(state_hash, take_state.prehash())] pass_state = deepcopy(state) pass_state.pass_turn() pass_edge = self.tree.edges[(state_hash, pass_state.prehash())] if action: # took card and pot when in this state alternate = pass_state alt_payoff = self.get_expected_payoff_pass(state) else: # passed when in this state alternate = take_state alt_payoff = self.get_expected_payoff_take(state) regret = alt_payoff - payoff # If we have never seen this state before, assign defaults. if node['visits'] == 0: logger.debug( 'LOG: State {} was visited for the first time this game.' .format(state_hash)) # Then update node and edge values node['visits'] += 1 # Add new regret for the action we DIDN'T take node['regret'][not action] += regret # Update strategy based on new cumulative regret effective_regret = [max(0, r) for r in node['regret']] total_regret = sum(effective_regret) if total_regret == 0: take_edge['weight'] = 1 / 2 pass_edge['weight'] = 1 / 2 else: take_edge[ 'weight'] = effective_regret[True] / total_regret pass_edge[ 'weight'] = effective_regret[False] / total_regret # Adjust running average strategy take_edge['avg_weight'] *= (node['visits'] - 1) / node['visits'] take_edge[ 'avg_weight'] += take_edge['weight'] / node['visits'] pass_edge['avg_weight'] *= (node['visits'] - 1) / node['visits'] pass_edge[ 'avg_weight'] += pass_edge['weight'] / node['visits']