def get_candidate_set_size( x_eval: numpy.ndarray, lower_bound: numpy.ndarray, upper_bound: numpy.ndarray, range_index: BallTree, ) -> numpy.ndarray: """ Computes the candidate set size with index support. :param x_eval: numpy.ndarray, shape: (n_eval, d), dtype: numpy.float The data points to evaluate. :param lower_bound: numpy.ndarray, shape: (n_eval,), dtype: numpy.float The lower bounds (one per data point) :param upper_bound: numpy.ndarray, shape: (n_eval,), dtype: numpy.float The upper bounds (one per data point) :param range_index: BallTree The index. :return: numpy.ndarray, shape: (n_eval,), dtype: numpy.int The candidate set sizes per data point. """ if lower_bound.ndim == 2: n, k = lower_bound.shape candidate_set_size = numpy.empty(shape=(n, k), dtype=numpy.int32) for i in range(k): candidate_set_size[:, i] = get_candidate_set_size( x_eval=x_eval, lower_bound=lower_bound[..., i], upper_bound=upper_bound[..., i], range_index=range_index) elif lower_bound.ndim == 1: # Distance smaller than lower bound? -> true inclusion n_lower = range_index.query_radius(X=x_eval, r=lower_bound, count_only=True) # Distance larger than upper bound? -> true exclusion n_upper = range_index.query_radius(X=x_eval, r=upper_bound, count_only=True) # Distance between? -> candidate candidate_set_size = n_upper - n_lower else: raise ValueError() return candidate_set_size
def test_ball_tree_query_radius(n_samples=100, n_features=10): np.random.seed(0) X = 2 * np.random.random(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail bt = BallTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind = bt.query_radius(query_pt, r + eps)[0] i = np.where(rad <= r + eps)[0] ind.sort() i.sort() assert_array_almost_equal(i, ind)
def test_ball_tree_query_radius_distance(n_samples=100, n_features=10): np.random.seed(0) X = 2 * np.random.random(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail bt = BallTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind, dist = bt.query_radius(query_pt, r + eps, return_distance=True) ind = ind[0] dist = dist[0] d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1)) assert_array_almost_equal(d, dist)
def _query_radius_iteratively(tree: BallTree, n_original_points: int, points: np.ndarray, cutoff: float, max_points_per_split=10000): # this method of using a mask rather than concatenating all points then finding the # unique values uses ~ 20x less memory! Very cheeky npoints = len(points) nsplits = math.ceil(npoints / max_points_per_split) mask = np.full(n_original_points, False) idxs = np.arange(n_original_points) for split in gen_even_slices(npoints, nsplits): query = tree.query_radius(points[split], cutoff) for result in query: mask[result] = True return idxs[mask]
def __init__(self, reciprocal_lattice: Lattice, original_points: np.ndarray, original_dim: np.ndarray, extra_points: np.ndarray, nworkers: int = pdefaults["nworkers"]): """ Args: original_points: nworkers: """ self._nworkers = nworkers if nworkers != -1 else cpu_count() supercell_points = get_supercell_points([2, 2, 2], original_points) # want points in cartesian space so we can define a regular spherical # cutoff even if reciprocal lattice is not cubic. If we used a # fractional cutoff, the cutoff regions would not be spherical cart_points = reciprocal_lattice.get_cartesian_coords(supercell_points) cart_extra_points = reciprocal_lattice.get_cartesian_coords( extra_points) # small cutoff is slighly larger than the max regular grid spacing # means at least 1 neighbour point will always be included in each # direction dim_lengths = np.dot(1 / original_dim, reciprocal_lattice.matrix) small_cutoff = np.max(dim_lengths) * 1.01 big_cutoff = small_cutoff * 2 # use BallTree for quickly evaluating which points are within cutoffs tree = BallTree(cart_points) # big cutoff points are those which surround the extra points within # the big cutoff (it does not include the extra points themselves) big_cutoff_points_idx = np.concatenate(tree.query_radius( cart_extra_points, big_cutoff), axis=0) # Voronoi points are those we actually calculate in the Voronoi diagram # e.g. the big points + extra points voronoi_points = supercell_points[big_cutoff_points_idx] self._voronoi_points = np.concatenate((voronoi_points, extra_points)) # small points are the points in original_points for which we want to # calculate the Voronoi volumes. Note this does not include the # indices of the extra points. Outside the small cutoff, the weights # will just be the regular grid weight. small_cutoff_points_idx = np.concatenate(tree.query_radius( cart_extra_points, small_cutoff), axis=0) # get the indices of small_cutoff_points in voronoi_points small_in_voronoi_idx = _get_loc(big_cutoff_points_idx, small_cutoff_points_idx) # get the indices of the small cutoff points + extra points # in voronoi points that we want the volumes for. The extra points # were just added at the end of big_cutoff_points, so getting their # indices is simple self._volume_points_idx = np.concatenate( (small_in_voronoi_idx, np.arange(len(extra_points)) + len(big_cutoff_points_idx))) # get the indices of the small_cutoff_points (not including the extra # points) in the original mesh. this works because the supercell # points are in the same order as the original mesh, just repeated for # each cell in the supercell small_in_original_idx = (small_cutoff_points_idx % len(original_points)) # get the indices of the small cutoff points + extra points in the # final volume array. Note that the final volume array has the same # order as original_mesh + extra_points self._volume_in_final_idx = np.concatenate( (small_in_original_idx, np.arange(len(extra_points)) + len(original_points))) # prepopulate the final volumes array. By default, each point has the # volume of the original mesh. Note: at this point, the extra points # will have zero volume. This will array will be updated by # compute_volumes self._final_volumes = np.full( len(original_points) + len(extra_points), 1 / len(original_points)) self._final_volumes[len(original_points):] = 0