def test_after_game_players_have_no_cards(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=1) game.play_round() assert len(p1.hand) == 0 assert len(p2.hand) == 0
def test_after_turn_hands_exchange_two_player(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], cards_per_player=10) p1_hand_before, p2_hand_before = p1.hand, p2.hand game.play_turn() assert len(p1.hand) == 9 assert len(p2.hand) == 9 assert all([(_ in p2_hand_before) for _ in p1.hand]) assert all([(_ in p1_hand_before) for _ in p2.hand])
def simple_egg_score_test(): p1 = Player("bob") p2 = Player("sharon") d = Deck.create([NigiriCard('egg')], [1000]) g = Game(deck=d, agents=[p1, p2], cards_per_player=10) g.play_round() g.play_round() print(g.gamelog) assert g.gamelog.shape[0] == 42 assert g.gamelog['reward'][2] == 0.0 assert g.gamelog['reward'][3] == 0.0
def test_certain_cards_carry_rewards_at_end_of_round(): p1 = Player("bob") p2 = Player("sharon") # create a deck with no cards that are worth points during a round d = Deck.create([MakiCard(3)], [100]) g = Game(deck=d, agents=[p1, p2], cards_per_player=10) g.play_round() g.play_round() print(g.gamelog) assert g.gamelog.shape[0] == 42 assert g.gamelog['reward'][0] == 0.0 assert g.gamelog['reward'][1] == 0.0
def test_after_turn_hands_exchange_three_player(): p1 = Player("bob") p2 = Player("sharon") p3 = Player("alice") game = Game(deck=StandardDeck(), agents=[p1, p2, p3], cards_per_player=8) p1_hand_before, p2_hand_before, p3_hand_before = p1.hand, p2.hand, p3.hand game.play_turn() assert len(p1.hand) == 7 assert len(p2.hand) == 7 assert len(p3.hand) == 7 assert all([(_ in p3_hand_before) for _ in p1.hand]) assert all([(_ in p2_hand_before) for _ in p3.hand]) assert all([(_ in p1_hand_before) for _ in p2.hand])
class Environment(): def __init__(self, name, opponents): self.name = name self.user_agent = "foo" self.game = Game(agents=[self.user_agent] + opponents) def reset(self): self.game.reset_game() return self.game.get_observation("env-player") def action_space(self): return self.game.get_action_space("env-player") def step(self, action):
def test_interal_game_reset_functionality(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2, cards_per_player=10) game.play_round() assert game.turn == 11 game.play_round() assert game.turn == 21 game.reset_game() assert game.turn == 0 assert len(p1.hand) == 10 assert len(p2.hand) == 10 # def test_simulated_games_should_be_distinct(): # p1 = Player("bob") # p2 = Player("sharon") # game = Game(deck_constructor=StandardDeck, agents=[p1, p2], n_rounds=2, cards_per_player=10) # result1 = game.simulate_game() # result2 = game.simulate_game() # result3 = game.simulate_game() # result4 = game.simulate_game() # result5 = game.simulate_game() # print(result5[['action', 'player', 'round', 'reward']]) # assert result5['bob'] < 100 # assert result5['sharon'] < 100
def test_simple_one_winner_one_round(): p1 = Player("bob") p2 = Player("sharon") d = Deck.create([NigiriCard('egg'), NigiriCard('salmon')], [19, 1]) g = Game(deck=d, agents=[p1, p2], cards_per_player=10, n_rounds=1) g.play_round() bob_log = g.gamelog[g.gamelog['player'] == 'bob'] bob_final_reward = bob_log['reward'].iloc[-1] sharon_log = g.gamelog[g.gamelog['player'] == 'sharon'] sharon_final_reward = sharon_log['reward'].iloc[-1] print(g.gamelog) assert g.gamelog.shape[0] == 22 assert (bob_final_reward == 10.) or (bob_final_reward == 11.) assert (sharon_final_reward == 10.) or (sharon_final_reward == 11.)
def test_reward_in_log_needs_to_accumulate(): p1 = Player("bob") p2 = Player("sharon") d = StandardDeck() # d = Deck(egg=15, salmon=15, squid=15, tempura=15, # sashimi=15, dumpling=15, pudding=0, # wasabi=15, maki1=10, maki2=10, maki3=10) g = Game(deck=d, agents=[p1, p2], cards_per_player=10, n_rounds=2) g.simulate_game() df = g.gamelog.sort_values(["player", "turn"]) for player in ["bob", "sharon", "alice"]: print(df[df['player'] == player]) p1_rewards = df[df['player'] == 'bob']['reward'] p2_rewards = df[df['player'] == 'sharon']['reward'] print(g.scores) assert all([_ >= 0 for _ in (p1_rewards - p1_rewards.shift().fillna(0))]) assert all([_ >= 0 for _ in (p2_rewards - p2_rewards.shift().fillna(0))])
def test_game_assigns_correct_number_of_cards_3player(): p1 = Player("bob") p2 = Player("sharon") p3 = Player("alice") game = Game(deck=StandardDeck(), agents=[p1, p2, p3], cards_per_player=8) assert len(game.players.keys()) == 3 assert isinstance(game.deck, Deck) assert len(game.players["bob"].hand) == 8 assert len(game.players["sharon"].hand) == 8 assert len(game.players["alice"].hand) == 8
def test_game_contains_players_cards_and_deck_2player(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], cards_per_player=10) assert len(game.players.keys()) == 2 assert isinstance(game.deck, Deck) print(game.players) print(game.players["bob"]) assert len(game.players["bob"].hand) == 10 assert len(game.players["sharon"].hand) == 10
def test_after_turn_hands_exchange_two_player(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2) print(game.scores) scores0 = game.end_results() game.play_round() scores1 = game.end_results() game.play_round() scores2 = game.end_results() print(scores1) print(scores2) assert scores0['bob'] == 0.0 assert scores1['bob'] > 0.0 assert scores2['bob'] > 0.0 assert scores2['sharon'] > 0.0
def test_certain_cards_carry_no_rewards_within_rounds(): p1 = Player("bob") p2 = Player("sharon") # create a deck with no cards that are worth points during a round d = Deck.create([PuddingCard(), WasabiCard(), MakiCard(3)], [8, 4, 4 * 7]) g = Game(deck=d, agents=[p1, p2], cards_per_player=5) g.play_turn() assert g.gamelog.shape[0] == 4 assert g.gamelog['reward'][0] == 0.0 assert g.gamelog['reward'][1] == 0.0 g.play_turn() g.play_turn() g.play_turn() assert g.gamelog['reward'].iloc[-1] == 0.0
def test_turns_update(): p1 = Player("bob") p2 = Player("sharon") game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2) game.play_round() assert game.turn == 11 game.play_round() assert game.turn == 21
def test_after_turn_hands_exchange_three_player(): p1 = Player("bob") p2 = Player("sharon") p3 = Player("alice") game = Game(deck=StandardInfiniDeck(), agents=[p1, p2, p3], n_rounds=3) scores0 = game.end_results() game.play_round() scores1 = game.end_results() game.play_round() scores2 = game.end_results() game.play_round() scores3 = game.end_results() assert scores3['bob'] > scores1['bob'] assert scores3['alice'] > scores0['alice'] assert scores3['bob'] > scores1['bob'] assert scores3['sharon'] > scores1['sharon']
def __init__(self, name, opponents): self.name = name self.user_agent = "foo" self.game = Game(agents=[self.user_agent] + opponents)
#Set up policy policy = Policy('LSTM', 22, 20, 1, 11) torch.manual_seed(123) #Parameters gamma = 0.99 #Set up optim lr = 1e-2 optimizer = optim.Adam(policy.parameters(), lr=lr) log_interval = 10 #Play games deck = StandardDeck() N_cards = len(list(set([str(_) for _ in deck]))) p1 = Pg_player(policy=policy, name="PG_player01") p2 = Simple_player(weights=[1 / N_cards] * N_cards, name="SIMPLE_player01") ewma = 0.5 alpha = 0.95 for n in range(100): game = Game([p1, p2], verbose=False) game.simulate_game() win = game.did_player_win(p1.name) ewma = alpha * ewma + (1 - alpha) * int(win) print('At %3i ewma win ratio %5.3f' % (n, ewma)) finish_game(policy, gamma=gamma, optimizer=optimizer) p1.prev_reward = None optimizer = adjust_learning_rate(optimizer, n, lr, 30)
def test_reward_in_log_needs_to_accumulate(): p1 = Player("bob") p2 = Player("sharon") d = Deck.create([DumplingCard()], [1000]) game = Game(deck=d, agents=[p1, p2], n_rounds=2)