def _traverse_from(self, grid: Grid, current_color: Color) -> bool: self._nodes_for_backprop.append((grid.state, current_color)) node_info: NodeInfo = self._tree[(grid.state, current_color)] if not node_info.is_leaf: move = self._select_best_child_ucb(grid, current_color) return self._traverse_from( grid.grid_after_move(current_color, move), Color(1 - current_color)) elif len(grid.available_moves) == 0: return False elif node_info.visits == 0: return self._rollout_from(grid, current_color) else: self._tree[(grid.state, current_color)] = NodeInfo(wins=node_info.wins, visits=node_info.visits, is_leaf=False) for move in grid.available_moves: new_grid = grid.grid_after_move(current_color, move) if (new_grid.state, Color(1 - current_color)) not in self._tree.keys(): self._tree[(new_grid.state, 1 - current_color)] = NodeInfo(wins=0, visits=0, is_leaf=True) move = random.choice(grid.available_moves) return self._traverse_from( grid.grid_after_move(current_color, move), Color(1 - current_color))
def _minmax(self, grid: Grid, depth: int, alpha: int, beta: int, color: Color, last_move: Union[int, None]) -> Tuple[int, int]: """Returns (best available value, move towards best value)""" computed = self._transposition_table.get(grid.state, None) if computed is not None and computed[0] >= depth: return computed[1:] if self._deadline.is_set() or depth == 0 or \ (last_move is not None and self._judge.is_over_after_move_in_col(grid.state, last_move)): return self._evaluate(grid.state, self._judge), 0 value = -INF if color == MAX_COLOR else INF best_move = None for move in grid.available_moves: child_value, _ = self._minmax(grid.grid_after_move(color, move), depth - 1, alpha, beta, Color(1 - color), move) if color == MAX_COLOR and child_value > value: best_move = move value = child_value alpha = max(alpha, value) elif color == MIN_COLOR and child_value < value: best_move = move value = child_value beta = min(beta, value) if alpha >= beta: break self._transposition_table[grid.state] = (depth, value, best_move) return value, best_move
def _rollout_from(self, grid: Grid, color: Color, last_col: Union[int, None] = None) -> bool: if len(grid.available_moves) == 0 or (last_col is None and self._judge.is_over(grid.state)) or \ (last_col is not None and self._judge.is_over_after_move_in_col(grid.state, last_col)): return color != self._color move = random.choice(grid.available_moves) has_won = self._rollout_from(grid.grid_after_move(color, move), Color(1 - color), move) return has_won
def _select_best_child(self, parent: Grid, map_node_info: callable, current_color: Color) -> int: """map_node_info should take parent's node_info and child's and return a positive float""" assert len(parent.available_moves) > 0, 'A leaf actually' parent_info = self._tree[(parent.state, current_color)] best_move, best_result = None, -1 for move in parent.available_moves: child_info = self._tree[(parent.grid_after_move( current_color, move).state, Color(1 - current_color))] temp_result = map_node_info(parent_info, child_info) if temp_result > best_result: best_result, best_move = temp_result, move return best_move
def _finishing_move_in(self, grid: Grid) -> Union[int, None]: for move in grid.available_moves: after_move = grid.grid_after_move(self._color, move) if self._judge.is_over_after_move_in_col(after_move.state, move): return move