def test_optimized_combined_heuristic(self): """test that the optimized, stateful heuristic provides the same values as the non-optimized one""" heuristic = CombinedHeuristic() optimized_heuristic = OptimizedCombinedHeuristic() game_1 = CCGame(width=5, visitors=[optimized_heuristic]) def heuristics_agree(): for player in [1, 2]: self.assertAlmostEqual(heuristic.value(game_1, player), optimized_heuristic.value(game_1, player), 2) game_1.move(2, 1, CCMovement.LS) heuristics_agree() game_1.move(7, 1, CCMovement.RN) heuristics_agree() game_1.undo_last_move() game_1.rotate_turn() heuristics_agree() game_1.undo_last_move() heuristics_agree()
def test_player_2_wins_in_one(self): game = CCGame(width=5, player_row_span=3) game.board = TEST_BOARD_STRATEGY_PLAYER_2_WINS_IN_ONE strategy = MinMaxStrategy(steps=0) game.rotate_turn() move, score = strategy._select_move(game, 2, 0, -100000, 100000) game.apply_move_sequence(move) self.assertEqual(100000, score) self.assertEqual(2, game.state())
def test_player_1_wins(self): game = CCGame(width=5, player_row_span=3) game.board = TEST_BOARD_STRATEGY_PLAYER_1_WINS_IN_TWO strategy = MinMaxStrategy(alpha_beta_pruning=False) move, score = strategy._select_move(game, 1, 0, -100000, 100000) self.assertTrue(score > 1000) game.apply_move_sequence(move) game.rotate_turn() move, score = strategy._select_move(game, 1, 0, -100000, 100000) self.assertEqual(100000, score) game.apply_move_sequence(move) self.assertEqual(1, game.state())
def test_available_moves_depth_2(self): game = CCGame(width=5) game.move(2, 0, CCMovement.RS) game.rotate_turn() moves = CCReasoner.available_moves(game, 1) self.assertTrue( CCMove([(0, 0), (2, 0), (4, 2)], [CCMovement.LS, CCMovement.RS]) in moves) self.assertTrue( CCMove([(1, 0), (3, 2), (3, 0)], [CCMovement.RS, CCMovement.L]) in moves) self.assertTrue( CCMove([(2, 2), (2, 0), (4, 2)], [CCMovement.L, CCMovement.RS]) in moves)
def test_player_1_wins(self): game = CCGame(width=5, player_row_span=3) game.board = TEST_BOARD_STRATEGY_PLAYER_1_WINS_IN_TWO strategy = OnlyMaxStrategy(steps=1, player=1, heuristic=CombinedVerticalAdvance()) move, score = strategy._select_move(game, 0) self.assertEqual(50000, score) game.apply_move_sequence(move) game.rotate_turn() move, score = strategy._select_move(game, 0) self.assertEqual(100000, score) game.apply_move_sequence(move) self.assertEqual(1, game.state())
def test_available_moves_player_2(self): game = CCGame(width=5) moves_1 = CCReasoner.available_moves(game, 1) game.rotate_turn() moves_2 = CCReasoner.available_moves(game, 2) self.assertEqual(len(moves_1), len(moves_2))
def _select_move(self, game: CCGame, depth: int) -> Tuple[CCMove, float]: """ Returns: tuple - position 0: best movet that can be done by the player at this level. - position 1: best heuristic value that can be achieved at this level if following best move. """ best_move, best_score = (None, -100000.0) if self.use_transposition_table and self.hasher: # transposition table business logic position_hash = self.hasher.get_hash(game) best_move, best_score, cached_depth = self.transposition_table.get( position_hash, (None, -100000.0, -1)) if best_move and cached_depth == depth: return (best_move, best_score) moves = self.available_moves(game, self.player) if game.player_turn != self.player: raise AssertionError(""" Player turn hasn't been rotated properly - this is likely a software bug """) for move in moves: if not best_move: best_move = move game.apply_move_sequence(move) # doesn't matter what the other does game.rotate_turn() # check if game has already ended if game.state() == 1: # player 1 wins # prefer winning in as few steps as possible curr_score = (100000 / (depth + 1) if self.player == 1 else -100000) elif game.state() == 2: # player 2 wins # prefer winning in as few steps as possible curr_score = (-100000 if self.player == 1 else 100000 / (depth + 1)) else: if depth == self.steps: curr_score = self.heuristic.value(game, self.player) else: curr_score = self._select_move(game, depth + 1)[1] # keep the best move that can be done at this level if (curr_score > best_score): best_score = curr_score best_move = move # undo movement for _ in range(0, len(move.directions)): game.undo_last_move() if best_move: if self.hasher: # save into transposition table self.transposition_table[position_hash] = (best_move, best_score, depth) return (best_move, best_score) else: raise AssertionError(""" No possible movements available, this must be a software bug """)
def _select_move(self, game: CCGame, player: int, depth: int, alpha: float, beta: float) -> Tuple[CCMove, float]: """ Returns: tuple - position 0: best movem that can be done by the player at this level. - position 1: best heuristic value that can be achieved at this level if following best move. Heuristic is negative for player 2 and position for player 1. """ if self.hasher: # transposition table business logic position_hash = self.hasher.get_hash(game) tt = (self.transposition_table_1 if player == 1 else self.transposition_table_2) best_move, best_score, cached_depth = (tt.get( position_hash, (None, -100000.0, -1))) if best_move and cached_depth == depth: return (best_move, best_score) moves = self.available_moves(game, player) if game.player_turn != player: raise AssertionError(""" Player turn hasn't been rotated properly - this is likely a software bug """) moves_queue = PriorityQueue() # type: ignore for move in moves: priority = 1 positions = move.board_positions if self.pre_sort_moves: advance = (positions[-1][0] - positions[0][0] if player == 1 else positions[0][0] - positions[-1][0]) if (self.extra_prunning and advance <= 0 and depth >= 3): # prune movements down the tree which don't bring any # extra advance continue # otherwise sort movements by vertical advance to maximize # alpha-beta pruning priority = -advance moves_queue.put(PrioritizedCCMove(priority, move)) best_move = None maximizing = depth % 2 == 0 best_score = -100000.0 if maximizing else 100000.0 while not moves_queue.empty(): move = moves_queue.get().move if not best_move: best_move = move game.apply_move_sequence(move) # check if game has already ended if game.state() == 1: # player 1 wins # prefer winning in as few steps as possible curr_score = (100000 / (depth + 1) if player == 1 else -100000) if not maximizing: curr_score = -curr_score elif game.state() == 2: # player 2 wins # prefer winning in as few steps as possible curr_score = (-100000 if player == 1 else 100000 / (depth + 1)) if not maximizing: curr_score = -curr_score else: if depth == self.steps * 2: # maximizing # approximate the score of the game by # subtracting heuristics curr_score = ( self.heuristic.value(game, player) - self.heuristic.value(game, 2 if player == 1 else 1)) else: curr_score = self._select_move(game, 2 if player == 1 else 1, depth + 1, alpha, beta)[1] # keep the best move that can be done at this level if ((maximizing and curr_score > best_score) or (not maximizing and curr_score < best_score)): best_score = curr_score best_move = move # undo movement if game.player_turn != player: game.rotate_turn() for _ in range(0, len(move.directions)): game.undo_last_move() # perform alpha-beta pruning if self.alpha_beta_pruning: if maximizing: alpha = max(alpha, best_score) else: beta = min(beta, best_score) if beta <= alpha: # alpha/beta pruning break if best_move: if self.hasher: # save into transposition table tt[position_hash] = (best_move, best_score, depth) return (best_move, best_score) else: raise AssertionError(""" No possible movements available, this must be a software bug """)
def play(board_size: int, player_row_span: int): random.seed(1) # (these weights were found running different experiments with the # weight_search.py script) oc_heuristic = OptimizedCombinedHeuristic(weights=[0.01, 0.44, 0.55]) game = CCGame(width=board_size, player_row_span=player_row_span, visitors=[oc_heuristic]) manual_players = set([2]) ai_players = { 1: MinMaxStrategy(steps=1, pre_sort_moves=True, transposition_table=True, heuristic=oc_heuristic) } start = time.time() player_turn = 1 turns = 0 gui = PygameGUI(game) manual_player = ManualPlayer(gui) ai_players_perf: Dict[int, List[float]] = {1: [], 2: []} while (game.state() == 0): print(f'Turn: {player_turn}') assert game.player_turn == player_turn gui.update() if player_turn not in manual_players: strategy = ai_players[player_turn] start = time.time() move = strategy.select_move(game, player_turn) end = time.time() ai_players_perf[player_turn].append(end - start) print(f'Move sequence: {move}') print(f'Turn {turns}') print(('Performance: ' f'{stats.describe(ai_players_perf[player_turn])}')) print(f'Heuristic values: {oc_heuristic.value(game, 1)} - ' f'{oc_heuristic.value(game, 2)}') game.apply_move_sequence(move) else: print("It's manual's player turn!") manual_player.move(game, player_turn) if game.player_turn == player_turn: game.rotate_turn() player_turn = game.player_turn turns += 1 print('..........................') print(f'PLAYER {game.state()} WINS after {turns} turns') end = time.time() print(end - start)