def test_interal_game_reset_functionality(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2, cards_per_player=10) game.play_round() assert game.turn == 11 game.play_round() assert game.turn == 21 game.reset_game() assert game.turn == 0 assert len(p1.hand) == 10 assert len(p2.hand) == 10 # def test_simulated_games_should_be_distinct(): # p1 = Player("bob") # p2 = Player("sharon") # game = Game(deck_constructor=StandardDeck, agents=[p1, p2], n_rounds=2, cards_per_player=10) # result1 = game.simulate_game() # result2 = game.simulate_game() # result3 = game.simulate_game() # result4 = game.simulate_game() # result5 = game.simulate_game() # print(result5[['action', 'player', 'round', 'reward']]) # assert result5['bob'] < 100 # assert result5['sharon'] < 100
def test_after_game_players_have_no_cards(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=1) game.play_round() assert len(p1.hand) == 0 assert len(p2.hand) == 0
def test_turns_update(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2) game.play_round() assert game.turn == 11 game.play_round() assert game.turn == 21
def test_standard_deck(): s = StandardDeck() card_types = (MakiCard, PuddingCard, NigiriCard, TempuraCard, SashimiCard, DumplingCard, WasabiCard) x = 2 for card_type in card_types: assert any(isinstance(card, card_type) for card in s.cards) for card in s.cards: assert any(isinstance(card, card_type) for card_type in card_types)
def __init__(self, weights, name="simple_player", deck=None): super(Simple_player, self).__init__() self.deck = deck if deck else StandardDeck() self.cards = list(set([str(_) for _ in self.deck])) self.num_cards = len(self.cards) self.cards_idx = {card: i for i, card in enumerate(self.cards)} self.weights = {key: value for key, value in zip(self.cards, weights)} self.N_cards = len(weights)
def test_game_contains_players_cards_and_deck_2player(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], cards_per_player=10) assert len(game.players.keys()) == 2 assert isinstance(game.deck, Deck) print(game.players) print(game.players["bob"]) assert len(game.players["bob"].hand) == 10 assert len(game.players["sharon"].hand) == 10
def test_after_turn_hands_exchange_two_player(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], cards_per_player=10) p1_hand_before, p2_hand_before = p1.hand, p2.hand game.play_turn() assert len(p1.hand) == 9 assert len(p2.hand) == 9 assert all([(_ in p2_hand_before) for _ in p1.hand]) assert all([(_ in p1_hand_before) for _ in p2.hand])
def test_game_assigns_correct_number_of_cards_3player(): p1 = Player("bob") p2 = Player("sharon") p3 = Player("alice") game = Game(deck=StandardDeck(), agents=[p1, p2, p3], cards_per_player=8) assert len(game.players.keys()) == 3 assert isinstance(game.deck, Deck) assert len(game.players["bob"].hand) == 8 assert len(game.players["sharon"].hand) == 8 assert len(game.players["alice"].hand) == 8
def __init__(self, policy, name="simple_player", deck=None): super(Pg_player, self).__init__() self.policy = policy self.name = name self.prev_reward = None self.deck = deck if deck else StandardDeck() self.cards = list(set([str(_) for _ in self.deck])) self.num_cards = len(self.cards) self.cards_idx = {card: i for i, card in enumerate(self.cards)} return
def test_after_turn_hands_exchange_three_player(): p1 = Player("bob") p2 = Player("sharon") p3 = Player("alice") game = Game(deck=StandardDeck(), agents=[p1, p2, p3], cards_per_player=8) p1_hand_before, p2_hand_before, p3_hand_before = p1.hand, p2.hand, p3.hand game.play_turn() assert len(p1.hand) == 7 assert len(p2.hand) == 7 assert len(p3.hand) == 7 assert all([(_ in p3_hand_before) for _ in p1.hand]) assert all([(_ in p2_hand_before) for _ in p3.hand]) assert all([(_ in p1_hand_before) for _ in p2.hand])
def test_after_turn_hands_exchange_two_player(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2) print(game.scores) scores0 = game.end_results() game.play_round() scores1 = game.end_results() game.play_round() scores2 = game.end_results() print(scores1) print(scores2) assert scores0['bob'] == 0.0 assert scores1['bob'] > 0.0 assert scores2['bob'] > 0.0 assert scores2['sharon'] > 0.0
def test_reward_in_log_needs_to_accumulate(): p1 = Player("bob") p2 = Player("sharon") d = StandardDeck() # d = Deck(egg=15, salmon=15, squid=15, tempura=15, # sashimi=15, dumpling=15, pudding=0, # wasabi=15, maki1=10, maki2=10, maki3=10) g = Game(deck=d, agents=[p1, p2], cards_per_player=10, n_rounds=2) g.simulate_game() df = g.gamelog.sort_values(["player", "turn"]) for player in ["bob", "sharon", "alice"]: print(df[df['player'] == player]) p1_rewards = df[df['player'] == 'bob']['reward'] p2_rewards = df[df['player'] == 'sharon']['reward'] print(g.scores) assert all([_ >= 0 for _ in (p1_rewards - p1_rewards.shift().fillna(0))]) assert all([_ >= 0 for _ in (p2_rewards - p2_rewards.shift().fillna(0))])
def __init__(self, agents, deck=None, cards_per_player=10, n_rounds=3, verbose=False): if len(set([_.name for _ in agents])) != len(agents): raise ValueError("two players in game have the same name") self.turn = 1 self.round = 1 self.verbose = verbose self.max_rounds = n_rounds self.cards_per_player = cards_per_player self.deck = deck if deck else StandardDeck() if self.cards_per_player * len( agents) * n_rounds > self.deck.cards_left: raise ValueError('Deck has not enough cards.') self.score = self.deck.scoring_function() self.players = {_.name: _ for _ in agents} self.game_id = str(uuid.uuid4())[:6] self.gamelog = pd.DataFrame({ "game_id": self.game_id, "round": 0, "turn": 0, "player": list(self.players.keys()), "action": '', "reward": 0, "round_reward": 0 }) self.scores = { "round-{}".format(i): {_.name: 0.0 for _ in agents} for i in range(0, n_rounds + 1) } for name in self.players.keys(): self.players[name].hand = list( islice(self.deck, self.cards_per_player))
from sushigo.game import Game #Set up policy policy = Policy('LSTM', 22, 20, 1, 11) torch.manual_seed(123) #Parameters gamma = 0.99 #Set up optim lr = 1e-2 optimizer = optim.Adam(policy.parameters(), lr=lr) log_interval = 10 #Play games deck = StandardDeck() N_cards = len(list(set([str(_) for _ in deck]))) p1 = Pg_player(policy=policy, name="PG_player01") p2 = Simple_player(weights=[1 / N_cards] * N_cards, name="SIMPLE_player01") ewma = 0.5 alpha = 0.95 for n in range(100): game = Game([p1, p2], verbose=False) game.simulate_game() win = game.did_player_win(p1.name) ewma = alpha * ewma + (1 - alpha) * int(win) print('At %3i ewma win ratio %5.3f' % (n, ewma)) finish_game(policy, gamma=gamma, optimizer=optimizer) p1.prev_reward = None
def get_score(cards, end_game=False, end_round=False): deck = StandardDeck() scorer = deck.scoring_function() return scorer(cards, end_game=end_game, end_round=end_round)