def get_next_move(self, board: KalahBoard): house_id = board.get_house_id(board.current_player()) for mv in sorted(board.allowed_moves(), reverse=True): seeds = board.get_board()[mv] if (house_id - mv) == seeds: return mv return self.random.choice(utils.get_best_moves(board))
def test_randomagent(self): board = KalahBoard(6,4) self.assertEqual(board.get_board(), [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]) self.assertEqual(board.allowed_moves(), [0, 1, 2, 3, 4, 5]) # We set a fixed random seed and take three known results as a test agent = RandomAgent(5) self.assertEqual(agent.get_next_move(board), 4) self.assertEqual(agent.get_next_move(board), 2) self.assertEqual(agent.get_next_move(board), 5)
def get_best_moves(board: KalahBoard): best_moves = [] best_score_gain = -1 for mv in board.allowed_moves(): score_gain = _get_score_gain(board, board.current_player(), mv) if score_gain > best_score_gain: best_moves = [mv] best_score_gain = score_gain elif score_gain == best_score_gain: best_moves.append(mv) return best_moves
def test_default_board(self): board = KalahBoard(6, 4) self.assertEqual(board.get_board(), [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]) board = KalahBoard(9, 6) self.assertEqual( board.get_board(), [6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0])
def compare_agents(bins, seeds, n_games, agent1, agent2): wins_agent_one = 0 wins_agent_two = 0 draws = 0 invalid_moves = 0 current_game = 0 while current_game < n_games: board = KalahBoard(bins, seeds) last_invalid_player = None invalid_count = 0 while not board.game_over(): if board.current_player() == 0: valid = board.move(agent1.get_next_move(board)) else: valid = board.move(agent2.get_next_move(board)) if not valid: if last_invalid_player == board.current_player(): invalid_count += 1 else: invalid_count = 0 last_invalid_player = board.current_player() if invalid_count > 10: break if invalid_count > 10: invalid_moves += 1 if last_invalid_player == 0: wins_agent_two += 1 else: wins_agent_one += 1 else: if board.score()[0] > board.score()[1]: wins_agent_one += 1 elif board.score()[0] < board.score()[1]: wins_agent_two += 1 else: draws += 1 current_game += 1 return [wins_agent_one, wins_agent_two, draws, invalid_moves]
def test_move_into_house_6_4(self): board = KalahBoard(6, 4) self.assertEqual(board.get_board(), [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [0, 0]) self.assertEqual(board.allowed_moves(), [0, 1, 2, 3, 4, 5]) self.assertEqual(board.move(2), True) self.assertEqual(board.get_board(), [4, 4, 0, 5, 5, 5, 1, 4, 4, 4, 4, 4, 4, 0]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 0]) self.assertEqual(board.allowed_moves(), [0, 1, 3, 4, 5]) self.assertEqual(board.move(2), False) self.assertEqual(board.move(1), True) self.assertEqual(board.get_board(), [4, 0, 1, 6, 6, 6, 1, 4, 4, 4, 4, 4, 4, 0]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 0]) self.assertEqual(board.allowed_moves(), [7, 8, 9, 10, 11, 12])
def test_first_moves_6_4(self): board = KalahBoard(6, 4) self.assertEqual(board.get_board(), [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [0, 0]) self.assertEqual(board.allowed_moves(), [0, 1, 2, 3, 4, 5]) self.assertEqual(board.move(6), False) self.assertEqual(board.move(7), False) self.assertEqual(board.move(13), False) self.assertEqual(board.move(123), False) self.assertEqual(board.move(0), True) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [0, 0]) self.assertEqual(board.allowed_moves(), [7, 8, 9, 10, 11, 12]) self.assertEqual(board.move(7), True) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [0, 0]) self.assertEqual(board.allowed_moves(), [1, 2, 3, 4, 5])
def test_get_house_id(self): board = KalahBoard(2, 2) self.assertEqual(board.get_house_id(0), 2) self.assertEqual(board.get_house_id(1), 5) board = KalahBoard(4, 2) self.assertEqual(board.get_house_id(0), 4) self.assertEqual(board.get_house_id(1), 9) board = KalahBoard(4, 4) self.assertEqual(board.get_house_id(0), 4) self.assertEqual(board.get_house_id(1), 9) board = KalahBoard(6, 4) self.assertEqual(board.get_house_id(0), 6) self.assertEqual(board.get_house_id(1), 13) board = KalahBoard(6, 6) self.assertEqual(board.get_house_id(0), 6) self.assertEqual(board.get_house_id(1), 13)
def test_get_best_moves(self): board = KalahBoard(4, 4) self.assertEqual(board.get_board(), [4, 4, 4, 4, 0, 4, 4, 4, 4, 0]) self.assertEqual(board.allowed_moves(), [0, 1, 2, 3]) self.assertEqual(utils.get_best_moves(board), [0, 1, 2, 3]) board.set_board([4, 0, 5, 5, 1, 5, 4, 4, 4, 0]) self.assertEqual(board.allowed_moves(), [0, 2, 3]) self.assertEqual(utils.get_best_moves(board), [0, 2, 3]) board.set_board([4, 0, 5, 5, 1, 5, 4, 4, 4, 0]) self.assertEqual(board.allowed_moves(), [0, 2, 3]) self.assertEqual(utils.get_best_moves(board), [0, 2, 3]) board.set_current_player(1) board.set_board([4, 0, 5, 5, 1, 5, 4, 4, 4, 0]) self.assertEqual(board.allowed_moves(), [5, 6, 7, 8]) self.assertEqual(utils.get_best_moves(board), [5, 6, 7, 8])
def get_next_move(self, board: KalahBoard): return self.random.choice(board.allowed_moves())
def test_empty_pit_capture_4_4(self): # Test for player 1 board = KalahBoard(4, 4) board.set_current_player(0) board.set_board([1, 0, 4, 4, 7, 4, 4, 4, 4, 0]) self.assertEqual(board.get_board(), [1, 0, 4, 4, 7, 4, 4, 4, 4, 0]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [7, 0]) self.assertEqual(board.allowed_moves(), [0, 2, 3]) self.assertEqual(board.move(0), True) self.assertEqual(board.get_board(), [0, 0, 4, 4, 12, 4, 0, 4, 4, 0]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [12, 0]) self.assertEqual(board.allowed_moves(), [5, 7, 8]) # Test for player 2 board = KalahBoard(4, 4) board.set_current_player(1) board.set_board([4, 0, 5, 5, 1, 5, 4, 4, 4, 0]) self.assertEqual(board.get_board(), [4, 0, 5, 5, 1, 5, 4, 4, 4, 0]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 0]) self.assertEqual(board.allowed_moves(), [5, 6, 7, 8]) self.assertEqual(board.move(7), True) self.assertEqual(board.get_board(), [5, 1, 5, 5, 1, 5, 4, 0, 5, 1]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 1]) self.assertEqual(board.allowed_moves(), [0, 1, 2, 3])
def test_end_game_collect_all_remaining_seeds_third_test_2_2(self): board = KalahBoard(2, 2) board.set_board([0, 3, 1, 2, 2, 0]) self.assertEqual(board.get_board(), [0, 3, 1, 2, 2, 0]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 0]) self.assertEqual(board.allowed_moves(), [1]) self.assertEqual(board.move(1), True) self.assertEqual(board.get_board(), [0, 0, 2, 0, 0, 6]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), True) self.assertEqual(board.score(), [2, 6]) self.assertEqual(board.allowed_moves(), [])
def test_end_game_collect_all_remaining_seeds_second_test_6_4(self): board = KalahBoard(6, 4) board.set_board([0, 0, 0, 1, 1, 0, 24, 0, 0, 0, 0, 0, 1, 21]) self.assertEqual(board.get_board(), [0, 0, 0, 1, 1, 0, 24, 0, 0, 0, 0, 0, 1, 21]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [24, 21]) self.assertEqual(board.allowed_moves(), [3, 4]) self.assertEqual(board.move(4), True) self.assertEqual(board.get_board(), [0, 0, 0, 1, 0, 1, 24, 0, 0, 0, 0, 0, 1, 21]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [24, 21]) self.assertEqual(board.allowed_moves(), [12]) self.assertEqual(board.move(12), True) self.assertEqual(board.get_board(), [0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 22]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), True) self.assertEqual(board.score(), [26, 22]) self.assertEqual(board.allowed_moves(), [])
def test_end_game_collect_all_remaining_seeds_6_4(self): board = KalahBoard(6, 4) board.set_board([0, 0, 1, 1, 0, 1, 30, 0, 0, 0, 0, 1, 0, 14]) self.assertEqual(board.get_board(), [0, 0, 1, 1, 0, 1, 30, 0, 0, 0, 0, 1, 0, 14]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [30, 14]) self.assertEqual(board.allowed_moves(), [2, 3, 5]) self.assertEqual(board.move(2), True) self.assertEqual(board.get_board(), [0, 0, 0, 2, 0, 1, 30, 0, 0, 0, 0, 1, 0, 14]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [30, 14]) self.assertEqual(board.allowed_moves(), [11]) self.assertEqual(board.move(11), True) self.assertEqual(board.get_board(), [0, 0, 0, 2, 0, 1, 30, 0, 0, 0, 0, 0, 1, 14]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [30, 14]) self.assertEqual(board.allowed_moves(), [3, 5]) self.assertEqual(board.move(3), True) self.assertEqual(board.get_board(), [0, 0, 0, 0, 1, 2, 30, 0, 0, 0, 0, 0, 1, 14]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [30, 14]) self.assertEqual(board.allowed_moves(), [12]) self.assertEqual(board.move(12), True) self.assertEqual(board.get_board(), [0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 15]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), True) self.assertEqual(board.score(), [33, 15]) self.assertEqual(board.allowed_moves(), [])
def test_moves_6_4(self): board = KalahBoard(6, 4) board.set_board([0, 0, 0, 0, 0, 1, 24, 0, 0, 0, 2, 0, 0, 21]) board.set_current_player(1) self.assertEqual(board.get_board(), [0, 0, 0, 0, 0, 1, 24, 0, 0, 0, 2, 0, 0, 21]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [24, 21]) self.assertEqual(board.allowed_moves(), [10]) self.assertEqual(board.move(10), True) self.assertEqual(board.get_board(), [0, 0, 0, 0, 0, 1, 24, 0, 0, 0, 0, 1, 1, 21]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [24, 21]) self.assertEqual(board.allowed_moves(), [5]) initial_board = [4, 4, 4, 4, 4, 0, 1, 5, 5, 5, 4, 4, 4, 0] board = KalahBoard(6, 4) board.set_board(initial_board) board.set_current_player(1) self.assertEqual(board.get_board(), initial_board) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 0]) self.assertEqual(board.allowed_moves(), [7, 8, 9, 10, 11, 12]) self.assertEqual(board.move(8), True) self.assertEqual(board.get_board(), [4, 4, 4, 4, 4, 0, 1, 5, 0, 6, 5, 5, 5, 1]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [1, 1]) self.assertEqual(board.allowed_moves(), [7, 9, 10, 11, 12])
def _get_score_gain(board: KalahBoard, player, mv): test_board = board.copy() old_score = test_board.score()[player] test_board.move(mv) return test_board.score()[player] - old_score
shutil.rmtree(results_path) from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter(results_path) torch.manual_seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) # agent to validate against for determining the win rate opponent_agent_class = MaxScoreRepeatAgent env = KalahEnv() env.set_board(KalahBoard(args.bins, args.seeds)) env.set_agent_factory(KalahAgentFactory(seed=args.seed)) env.seed(args.seed) battleground = KalahBattleground(args.bins, args.seeds) c_count = 0 def run_compare(model): n_games = args.evaluation_games global c_count c_count += 1 results = battleground.battle(lambda seed: ActorCriticAgent(model, seed),
def test_empty_pit_opposite_no_empty_capture_4_4(self): # We do not have the "empty capture" rule board = KalahBoard(4, 4) board.set_board([1, 0, 4, 4, 7, 4, 0, 4, 4, 4]) self.assertEqual(board.get_board(), [1, 0, 4, 4, 7, 4, 0, 4, 4, 4]) self.assertEqual(board.current_player(), 0) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [7, 4]) self.assertEqual(board.allowed_moves(), [0, 2, 3]) self.assertEqual(board.move(0), True) self.assertEqual(board.get_board(), [0, 1, 4, 4, 7, 4, 0, 4, 4, 4]) self.assertEqual(board.current_player(), 1) self.assertEqual(board.game_over(), False) self.assertEqual(board.score(), [7, 4]) self.assertEqual(board.allowed_moves(), [5, 7, 8])
def battle(self, lambda_agent1, lambda_agent2, n_games=1000, seed=543): """ Start a battle of n_games lambda_agent1 and lambda_agent2 are lambda functions with seed as argument """ wins_agent1 = 0 wins_agent2 = 0 draws = 0 current_game = 0 cnt = 0 while current_game < n_games: board = KalahBoard(self._bins, self._seeds) agent1 = (lambda_agent1)(seed + cnt) cnt += 1 agent2 = (lambda_agent2)(seed + cnt) cnt += 1 last_invalid_player = None invalid_count = 0 while not board.game_over(): if board.current_player() == 0: valid = board.move(agent1.get_next_move(board)) else: valid = board.move(agent2.get_next_move(board)) if not valid: if last_invalid_player == board.current_player(): invalid_count += 1 else: invalid_count = 0 last_invalid_player = board.current_player() if invalid_count > 10: break if invalid_count > 10: if last_invalid_player == 0: wins_agent1 += 1 else: wins_agent2 += 1 else: if board.score()[0] > board.score()[1]: wins_agent1 += 1 elif board.score()[0] < board.score()[1]: wins_agent2 += 1 else: draws += 1 current_game += 1 return KalahBattleGroundResults(n_games, wins_agent1, wins_agent2, draws)
def test_first_last_bin_functions(self): board = KalahBoard(4, 4) self.assertEqual(board._get_first_bin(0), 0) self.assertEqual(board._get_last_bin(0), 3) self.assertEqual(board._get_first_bin(1), 5) self.assertEqual(board._get_last_bin(1), 8) board = KalahBoard(4, 6) self.assertEqual(board._get_first_bin(0), 0) self.assertEqual(board._get_last_bin(0), 3) self.assertEqual(board._get_first_bin(1), 5) self.assertEqual(board._get_last_bin(1), 8) board = KalahBoard(2, 4) self.assertEqual(board._get_first_bin(0), 0) self.assertEqual(board._get_last_bin(0), 1) self.assertEqual(board._get_first_bin(1), 3) self.assertEqual(board._get_last_bin(1), 4) board = KalahBoard(6, 4) self.assertEqual(board._get_first_bin(0), 0) self.assertEqual(board._get_last_bin(0), 5) self.assertEqual(board._get_first_bin(1), 7) self.assertEqual(board._get_last_bin(1), 12)