def _traverse_from(self, grid: Grid, current_color: Color) -> bool: self._nodes_for_backprop.append((grid.state, current_color)) node_info: NodeInfo = self._tree[(grid.state, current_color)] if not node_info.is_leaf: move = self._select_best_child_ucb(grid, current_color) return self._traverse_from( grid.grid_after_move(current_color, move), Color(1 - current_color)) elif len(grid.available_moves) == 0: return False elif node_info.visits == 0: return self._rollout_from(grid, current_color) else: self._tree[(grid.state, current_color)] = NodeInfo(wins=node_info.wins, visits=node_info.visits, is_leaf=False) for move in grid.available_moves: new_grid = grid.grid_after_move(current_color, move) if (new_grid.state, Color(1 - current_color)) not in self._tree.keys(): self._tree[(new_grid.state, 1 - current_color)] = NodeInfo(wins=0, visits=0, is_leaf=True) move = random.choice(grid.available_moves) return self._traverse_from( grid.grid_after_move(current_color, move), Color(1 - current_color))
def test_move(self): g = Grid() g.move(Color.RED, 0) state = g.state.cols_first self.assertEqual(state[0][0], Color.RED) self.assertIsNone(state[0][1])
def _minmax(self, grid: Grid, depth: int, alpha: int, beta: int, color: Color, last_move: Union[int, None]) -> Tuple[int, int]: """Returns (best available value, move towards best value)""" computed = self._transposition_table.get(grid.state, None) if computed is not None and computed[0] >= depth: return computed[1:] if self._deadline.is_set() or depth == 0 or \ (last_move is not None and self._judge.is_over_after_move_in_col(grid.state, last_move)): return self._evaluate(grid.state, self._judge), 0 value = -INF if color == MAX_COLOR else INF best_move = None for move in grid.available_moves: child_value, _ = self._minmax(grid.grid_after_move(color, move), depth - 1, alpha, beta, Color(1 - color), move) if color == MAX_COLOR and child_value > value: best_move = move value = child_value alpha = max(alpha, value) elif color == MIN_COLOR and child_value < value: best_move = move value = child_value beta = min(beta, value) if alpha >= beta: break self._transposition_table[grid.state] = (depth, value, best_move) return value, best_move
def create_game(first_player: Player, second_player: Player, first_color: Color = Color.RED) -> Game: return Game(grid=Grid(), judge=judge, first_player=first_player, second_player=second_player, first_color=first_color)
def _rollout_from(self, grid: Grid, color: Color, last_col: Union[int, None] = None) -> bool: if len(grid.available_moves) == 0 or (last_col is None and self._judge.is_over(grid.state)) or \ (last_col is not None and self._judge.is_over_after_move_in_col(grid.state, last_col)): return color != self._color move = random.choice(grid.available_moves) has_won = self._rollout_from(grid.grid_after_move(color, move), Color(1 - color), move) return has_won
def make_move_in_state(self, state: State) -> int: grid = Grid.from_state(state) assert len(grid.available_moves) > 0, 'No move available' finishing_move = self._finishing_move_in(grid) if finishing_move is not None: return finishing_move if (state, self._color) not in self._tree.keys(): self._tree[(state, self._color)] = NodeInfo(visits=0, is_leaf=True, wins=0) self._compute(grid) return self._pick_most_visited_child_of(grid)
def test_middle_finishing(self): judge = Judge() g = Grid(ncols=4, nrows=2) g.move(Color.RED, 0) g.move(Color.RED, 1) g.move(Color.RED, 3) g.move(Color.RED, 2) self.assertTrue(judge.is_over(g.state)) self.assertTrue(judge.is_over_after_move_in_col(g.state, 2))
def _select_best_child(self, parent: Grid, map_node_info: callable, current_color: Color) -> int: """map_node_info should take parent's node_info and child's and return a positive float""" assert len(parent.available_moves) > 0, 'A leaf actually' parent_info = self._tree[(parent.state, current_color)] best_move, best_result = None, -1 for move in parent.available_moves: child_info = self._tree[(parent.grid_after_move( current_color, move).state, Color(1 - current_color))] temp_result = map_node_info(parent_info, child_info) if temp_result > best_result: best_result, best_move = temp_result, move return best_move
def test_initial_state_rows_first(self): state = Grid(ncols=7, nrows=6).state.rows_first self.assertEqual(len(state), 6) self.assertEqual(len(state[0]), 7)
def test_size(self): g = Grid(ncols=12, nrows=2) self.assertEqual(g.ncols, 12) self.assertEqual(g.nrows, 2)
def test_vertical_win(self): judge = Judge() g = Grid(ncols=2, nrows=4) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 0) g.move(Color.BLACK, 1) self.assertFalse(judge.is_over(g.state)) g.move(Color.RED, 0) self.assertTrue(judge.is_over(g.state)) self.assertTrue(judge.is_over_vertical(g.state)) self.assertFalse(judge.is_over_horizontal(g.state))
def _finishing_move_in(self, grid: Grid) -> Union[int, None]: for move in grid.available_moves: after_move = grid.grid_after_move(self._color, move) if self._judge.is_over_after_move_in_col(after_move.state, move): return move
def test_state(self): g = Grid(ncols=5, nrows=4) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 1) g.move(Color.BLACK, 2) g.move(Color.RED, 4) g.move(Color.BLACK, 1) state = g.state.rows_first # 3 - - - - - # 2 - B - - - # 1 - R - - - # 0 R B B - R # # 0 1 2 3 4 self.assertSequenceEqual([None, None, None, None, None], state[3]) self.assertSequenceEqual([None, Color.BLACK, None, None, None], state[2]) self.assertSequenceEqual([None, Color.RED, None, None, None], state[1]) self.assertSequenceEqual( [Color.RED, Color.BLACK, Color.BLACK, None, Color.RED], state[0])
def test_rightup_win(self): judge = Judge() g = Grid(ncols=5, nrows=4) g.move(Color.RED, 1) g.move(Color.BLACK, 0) g.move(Color.RED, 2) g.move(Color.BLACK, 1) g.move(Color.RED, 3) g.move(Color.BLACK, 2) g.move(Color.RED, 3) g.move(Color.BLACK, 2) g.move(Color.RED, 3) # 3 - - - B - # 2 - - B R - # 1 - B B R - # 0 B R R R - # 0 1 2 3 4 self.assertFalse(judge.is_over(g.state)) g.move(Color.BLACK, 3) self.assertTrue(judge.is_over(g.state)) self.assertTrue(judge.is_over_rightup(g.state)) self.assertFalse(judge.is_over_rightdown(g.state))
def test_move_outside(self): g = Grid() with self.assertRaises(AssertionError): g.move(Color.RED, 7)
def make_move_in_state(self, state: State) -> int: self._deadline.clear() Timer(self._timeout, lambda: self._deadline.set()).start() return self._iterative_deepening(Grid.from_state(state))
(last_move is not None and self._judge.is_over_after_move_in_col(grid.state, last_move)): return self._evaluate(grid.state, self._judge), 0 value = -INF if color == MAX_COLOR else INF best_move = None for move in grid.available_moves: child_value, _ = self._minmax(grid.grid_after_move(color, move), depth - 1, alpha, beta, Color(1 - color), move) if color == MAX_COLOR and child_value > value: best_move = move value = child_value alpha = max(alpha, value) elif color == MIN_COLOR and child_value < value: best_move = move value = child_value beta = min(beta, value) if alpha >= beta: break self._transposition_table[grid.state] = (depth, value, best_move) return value, best_move if __name__ == '__main__': game = Game(Grid(), Judge(), MinmaxPlayer(Color.RED, Judge(), Evaluator(), 4, 30), MinmaxPlayer(Color.BLACK, Judge(), Evaluator(), 6, 18)) print(game.play())
def test_from_coords(self): judge = Judge() g = Grid(ncols=5, nrows=4) g.move(Color.RED, 2) g.move(Color.BLACK, 3) g.move(Color.RED, 1) g.move(Color.BLACK, 2) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 0) g.move(Color.BLACK, 0) # 3 B - - - - # 2 R B - - - # 1 R B B - - # 0 R R R B - # 0 1 2 3 4 self.assertFalse(judge.is_over_from(g.state, 1, 2)) self.assertFalse(judge.is_over_from(g.state, 2, 1)) self.assertTrue(judge.is_over_from(g.state, 0, 3)) self.assertTrue(judge.is_over_from(g.state, 3, 0))
def test_horizontal_win(self): judge = Judge() g = Grid(ncols=4, nrows=4) g.move(Color.RED, 0) g.move(Color.BLACK, 0) g.move(Color.RED, 1) g.move(Color.BLACK, 1) g.move(Color.RED, 2) g.move(Color.BLACK, 2) self.assertFalse(judge.is_over(g.state)) g.move(Color.RED, 3) self.assertTrue(judge.is_over(g.state)) self.assertTrue(judge.is_over_horizontal(g.state)) self.assertFalse(judge.is_over_rightdown(g.state))
def test_rightdown_win(self): judge = Judge() g = Grid(ncols=5, nrows=4) g.move(Color.RED, 2) g.move(Color.BLACK, 3) g.move(Color.RED, 1) g.move(Color.BLACK, 2) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 0) g.move(Color.BLACK, 1) g.move(Color.RED, 0) # 3 B - - - - # 2 R B - - - # 1 R B B - - # 0 R R R B - # 0 1 2 3 4 self.assertFalse(judge.is_over(g.state)) g.move(Color.BLACK, 0) self.assertTrue(judge.is_over(g.state)) self.assertTrue(judge.is_over_rightdown(g.state)) self.assertFalse(judge.is_over_vertical(g.state))
def test_overflow(self): g = Grid() for _ in range(6): g.move(Color.RED, 2) with self.assertRaises(AssertionError): g.move(Color.RED, 2)
move = random.choice(grid.available_moves) return self._traverse_from( grid.grid_after_move(current_color, move), Color(1 - current_color)) def _rollout_from(self, grid: Grid, color: Color, last_col: Union[int, None] = None) -> bool: if len(grid.available_moves) == 0 or (last_col is None and self._judge.is_over(grid.state)) or \ (last_col is not None and self._judge.is_over_after_move_in_col(grid.state, last_col)): return color != self._color move = random.choice(grid.available_moves) has_won = self._rollout_from(grid.grid_after_move(color, move), Color(1 - color), move) return has_won def _finishing_move_in(self, grid: Grid) -> Union[int, None]: for move in grid.available_moves: after_move = grid.grid_after_move(self._color, move) if self._judge.is_over_after_move_in_col(after_move.state, move): return move if __name__ == '__main__': game = Game(Grid(), Judge(), MCTSPlayer(Color.RED, Judge(), 2, 1000), MCTSPlayer(Color.BLACK, Judge(), 5, 1000)) print(game.play())
def test_empty_grid(self): Grid(ncols=7, nrows=6)
def test_from_last_in_empty(self): judge = Judge() g = Grid(ncols=2, nrows=2) with self.assertRaises(AssertionError): judge.is_over_after_move_in_col(g.state, 1)