def test_game_end(self): game = gm.GameState() features = fe.state2feature(game) start_player = np.sum(features[0, 15, :]) player = RandomPolicy() # Play the end until the end while (not game.is_end_of_game()): (card, move) = player.get_action(game) game.play_round(card, *move) features = fe.state2feature(game) # At the end 9 cards should fill up the board self.assertTrue((np.sum(features[0, 4:13, :], axis=1) == [1, 1, 1, 1, 1, 1, 1, 1, 1]).all()) # At the end the winner should own more card if game.get_winner() == gm.LEFT_PLAYER: self.assertTrue(np.sum(features[0, 14, :]) > gm.START_HANDS) elif game.get_winner() == gm.RIGHT_PLAYER: self.assertTrue(np.sum(features[0, 14, :]) < gm.START_HANDS) else: self.assertTrue(np.sum(features[0, 14, :]) == gm.START_HANDS) # The current player should either be 1 for all the cards, or 0 for all the cards self.assertTrue( np.sum(features[0, 15, :]) == features.shape[2] - start_player)
def test_get_action(self): player = NNPolicy() input = np.zeros((1, fe.get_feature_dim(player.features), 2 * gm.START_HANDS)) game = gm.GameState() while(not game.is_end_of_game()): (card, move) = player.get_action(game) self.assertTrue(card.position == (-1, -1) and card.owner == game.current_player) self.assertTrue(game.board[Helper.tuple2idx(game.board_size, *move)] is None) game.play_round(card, *move)
def test_run_single_game(self): game = gm.GameState() (states, cards, moves) = su.simulate_single_game(self.target, game) self.assertTrue(np.array(states).shape == (gm.BOARD_SIZE **2, fe.get_feature_dim(), gm.START_HANDS * 2)) self.assertTrue(np.array(cards).shape == (gm.BOARD_SIZE **2, gm.START_HANDS * 2)) self.assertTrue(np.array(moves).shape == (gm.BOARD_SIZE **2, gm.BOARD_SIZE **2)) sum_cards = np.sum(np.array(cards), 0) sum_moves = np.sum(np.array(moves), 0) self.assertTrue( np.all( sum_cards <= 1) ) self.assertTrue( np.all( sum_moves == 1) )
def simulate_games(player, opponent, metadata): """ Args: player: a policy for the player side opponent: another policy for the opponent side metadata: a dictionary which contains the meta data for this training process Returns: states: a list with n elements, where n is the number of games (in each batch) specified by game_batch in metadata. Each element is another list with m elements, where m is the moves made in this game by the player (we only train based on the player actions, not the opponent). Each element in this list is the game feature (basically a ndarray with size 1xDIMx10. DIM is the dimension of all selected features for each card) card_actions: Similar to the states, a list of list for each game, and the element in the inner list is a one-hot vector representing the action for picking a card(a 1xn ndarray where n=2*HAND_SIZE. The number one in this array represents the card to pick) move_actions: Similar to the states, a list of list for each game, and the element in the inner list is a one-hot vector representing the action for picking a move(a 1xn ndarray where n=BOARD_SIZE**2. The number one in this array represents which grid on the board index to place the card picked) rewards: Similar to the actions, a list of list for each game, and the element in the inner list is a number, either 1 or 0 which represent win or lose for the whole game. """ states = [[] for _ in range(metadata["game_batch"])] # Feature from the game state, i.e. by default feature a 16 x 10 array card_actions = [[] for _ in range(metadata["game_batch"])] # Card is the one-hot vector for the 10 cards from left to right move_actions = [[] for _ in range(metadata["game_batch"])] # Move is the one-hot vector for the 9 possible moves rewards = [0 for _ in range(metadata["game_batch"])] # Either player has won (1), tied (0), or lost (-1) # Learner is always the left player, and the opponent picked from the pool is always the right player # Game will start randomly by left or right player by a 50/50 card_pool = gm.GameState.load_cards_from_file(metadata["card_path"], metadata["card_file"]) for i in range(metadata["game_batch"]): default_cards = random.sample(card_pool, gm.START_HANDS) left_cards = [card.clone() for card in default_cards] right_cards = [card.clone() for card in default_cards] new_game = gm.GameState(left_cards = left_cards, right_cards = right_cards) while(not new_game.is_end_of_game()): if new_game.current_player == gm.LEFT_PLAYER: # Record all the moves made by the learner (card, move) = player.get_action(new_game) states[i].append(fe.state2feature(new_game)) (card_vector, move_vector) = player.action_to_vector(new_game, card, move) card_actions[i].append(np.expand_dims(card_vector, axis=0)) move_actions[i].append(np.expand_dims(move_vector, axis=0)) else: (card, move) = opponent.get_action(new_game) new_game.play_round(card, *move) rewards[i] = new_game.get_winner() # treat the loss and tie as the same since we only want to win return (states, card_actions, move_actions, rewards)
def state_action_generator(target_policy, metadata): """ Args: target_policy: a policy for the NNPolicy to learn to. We use the manually crafted policy here. metadata: a dictionary which contains the meta data for this training process Yields: states: a nparray with shape (n, dim, 10). Here n is batch_size*9 (total steps for each game is 9). dim is the dimension of all selected features for each card, and 10 is for each card in both hands. cards: a nparray with shape (n, 10). Here n is batch_size*9 (total steps for each game is 9). The second dimension is a one-hot vector specifying which card to pick. moves: a nparray with shape (n, 9). Here n is batch_size*9 (total steps for each game is 9). The second dimension is a one-hot vector specifying which position on the board to play the card. """ left_card_file = gm.GameState.load_cards_from_file(metadata["card_path"], metadata["card_file"]) right_card_file = gm.GameState.load_cards_from_file( metadata["card_path"], metadata["card_file"]) while True: all_states = [] all_cards = [] all_moves = [] for idx in range(metadata["batch_size"]): left_cards = random.sample(left_card_file, gm.START_HANDS) right_cards = random.sample(right_card_file, gm.START_HANDS) new_game = gm.GameState(left_cards=left_cards, right_cards=right_cards) (states, cards, moves) = simulate_single_game(target_policy, new_game) all_states.append(states) all_cards.append(cards) all_moves.append(moves) np_states = np.array( all_states ) # the shape should be steps_per_game x batch_size x feature_dim x 10. Would need to reshape to merge the first two dims np_cards = np.array( all_cards ) # the shape should be steps_per_game x batch_size x 19. Would need to reshape to merge the first two dims np_moves = np.array( all_moves ) # the shape should be steps_per_game x batch_size x 19. Would need to reshape to merge the first two dims yield (np_states.reshape((-1,) + np_states.shape[2:]), \ {"card_output": np.array(np_cards.reshape((-1,) + np_cards.shape[2:])), \ "move_output": np.array(np_moves.reshape((-1,) + np_moves.shape[2:]))}) del all_states del all_cards del all_moves
def test_action_to_vector(self): player = BaselinePolicy() cards = [] moves = [] game = gm.GameState() while(not game.is_end_of_game()): (card, move) = player.get_action(game) (card_vector, move_vector) = player.action_to_vector(game, card, move) self.assertTrue( sum(card_vector) == 1 and sum(move_vector) == 1) cards.append(card_vector) moves.append(move_vector) game.play_round(card, *move) sum_cards = np.sum(np.array(cards), 0) sum_moves = np.sum(np.array(moves), 0) self.assertTrue( np.all( sum_cards <= 1) ) self.assertTrue( np.all( sum_moves == 1) )
def test_game_start(self): game = gm.GameState() self.assertTrue(fe.get_feature_dim() == 16) features = fe.state2feature(game) self.assertTrue(features.shape[0] == 1) self.assertTrue(features.shape[1] == 16) self.assertTrue(features.shape[2] == 2 * gm.START_HANDS) # At the beginning all the cards are in the hands self.assertTrue((np.sum(features[0, 4:14, :], axis=1) == [0, 0, 0, 0, 0, 0, 0, 0, 0, 10]).all()) # At the beginning all the cards are equally owner by both players self.assertTrue(np.sum(features[0, 14, :]) == gm.START_HANDS) # The current player should either be 1 for all the cards, or 0 for all the cards start_player = np.sum(features[0, 15, :]) self.assertTrue(start_player == features.shape[2] or start_player == 0)
def compare_policy(player, opponent, num_games, card_file_path="test_cards", card_file_name="cards.csv"): default_left_cards = gm.GameState.load_cards_from_file( card_file_path, card_file_name) default_right_cards = gm.GameState.load_cards_from_file( card_file_path, card_file_name) winner = [] for i in range(num_games): left_cards = random.sample(default_left_cards, 5) right_cards = random.sample(default_right_cards, 5) for card in left_cards + right_cards: card.reset() game = gm.GameState(left_cards=left_cards, right_cards=right_cards) while not game.is_end_of_game(): # Player is always on the left, and the opponent is always on the right. Randomly picks who starts the game. if game.current_player == gm.LEFT_PLAYER: (card, move) = player.get_action(game) else: (card, move) = opponent.get_action(game) game.play_round(card, *move) winner.append(game.get_winner()) """ if i%10 == 0 and i > 0: won_games = sum(1 for _ in filter(lambda x: x == gm.LEFT_PLAYER, winner)) tie_games = sum(1 for _ in filter(lambda x: x== gm.NO_ONE, winner)) lost_games = sum(1 for _ in filter(lambda x: x== gm.RIGHT_PLAYER, winner)) print("This is the {}th game, current win rate: {}, tie rate: {}, lose rate: {}".format(i, round(won_games / i, 2), \ round(tie_games / i, 2), round(lost_games / i, 2)), end='\r') """ won_games = sum(1 for _ in filter(lambda x: x == gm.LEFT_PLAYER, winner)) tie_games = sum(1 for _ in filter(lambda x: x == gm.NO_ONE, winner)) lost_games = sum(1 for _ in filter(lambda x: x == gm.RIGHT_PLAYER, winner)) print( "Evaluation done. Player won {} games, tied {} games, and lost {} games" .format(won_games, tie_games, lost_games)) return round(won_games / num_games, 2)