def trim(self, points, radius): """ remove points too close to the cut curve. they dont add anything, and only lead to awkward faces """ #some precomputations tree = KDTree(points) cp = self.vertices[self.faces] normal = util.normalize(np.cross(cp[:,0], cp[:,1])) mid = util.normalize(cp.sum(axis=1)) diff = np.diff(cp, axis=1)[:,0,:] edge_radius = np.sqrt(util.dot(diff, diff)/4 + radius**2) index = np.ones(len(points), np.bool) #eliminate near edges def near_edge(e, p): return np.abs(np.dot(points[p]-mid[e], normal[e])) < radius for i,(p,r) in enumerate(izip(mid, edge_radius)): coarse = tree.query_ball_point(p, r) index[[c for c in coarse if near_edge(i, c)]] = 0 #eliminate near points for p in self.vertices: coarse = tree.query_ball_point(p, radius) index[coarse] = 0 return points[index]
def rho_delta(x, gk): m = x.shape[0] data = Tree(x) rho = np.zeros(m, dtype=np.float16) # Compute Rho for i in range(m): points = data.query_ball_point(x[i, :], gk) rho[i] = np.max([len(points), 0]) ordrho = np.argsort(rho)[::-1] # Compute delta delta = np.zeros(m, dtype=np.float16) x_sort = x[ordrho, :] data_srt = Tree(x_sort) for i in np.arange(1, m): points = np.array(data_srt.query_ball_point(x_sort[i, :], 3 * gk)) points = points[points < i] min_d = 5 * gk if len(points): d = np.sqrt(np.min(np.sum((x_sort[points] - x_sort[i])**2, axis=1))) if d < min_d: min_d = d delta[i] = min_d delta[0] = np.max(delta) # Re-shuffle delta correctly out_delta = np.zeros(m, dtype=np.float16) out_delta[ordrho] = delta return rho, out_delta
def update(self, d_t: float, position_kd_tree: cKDTree) -> np.ndarray: """Updates the CellSignal given a cKDTree with all cells positions and a time step. This finds all cells within it's current diameter. It then increases the diameter using d_t and evaluates the cells inside that diameter. The indices of the difference between the two is returned. Args: d_t (float): The time step to evaluate. position_kd_tree (cKDTree): The KDTree containing the positions of the cells. Returns: np.asarray: An array of Cell indices into the KDTree of cells which have received the signal at this time step. """ # First get the set of cells within the signal radius initial_idxs = position_kd_tree.query_ball_point( self.position, self.diameter / 2) received_cells = np.asarray([]) # If the diameter exceeds the maximum diameter, the signal dies if self.diameter < self.max_diameter: self.diameter = self.diameter + d_t * self.speed new_idxs = position_kd_tree.query_ball_point( self.position, self.diameter / 2) received_cells = np.setdiff1d(new_idxs, initial_idxs) else: self.is_active = False self.last_signaled = self.clock return received_cells
def group_vectors(vectors, angle = np.radians(10), include_negative = False): ''' Group vectors based on an angle tolerance, with the option to include negative vectors. This is very similar to a group_rows(stack_negative(rows)) The main difference is that max_angle can be much looser, as we are doing actual distance queries. ''' dist_max = np.tan(angle) unit_vectors, valid = unitize(vectors, check_valid = True) valid_index = np.nonzero(valid)[0] consumed = np.zeros(len(unit_vectors), dtype=np.bool) tree = KDTree(unit_vectors) unique_vectors = deque() aligned_index = deque() for index, vector in enumerate(unit_vectors): if consumed[index]: continue aligned = np.array(tree.query_ball_point(vector, dist_max)) vectors = unit_vectors[aligned] if include_negative: aligned_neg = tree.query_ball_point(-1.0*vector, dist_max) vectors = np.vstack((vectors, -unit_vectors[aligned_neg])) aligned = np.append(aligned, aligned_neg) aligned = aligned.astype(int) consumed[aligned] = True unique_vectors.append(np.median(vectors, axis=0)) aligned_index.append(valid_index[aligned]) return np.array(unique_vectors), np.array(aligned_index)
def group_vectors(vectors, angle = np.radians(10), include_negative = False): ''' Group vectors based on an angle tolerance, with the option to include negative vectors. This is very similar to a group_rows(stack_negative(rows)) The main difference is that max_angle can be much looser, as we are doing actual distance queries. ''' dist_max = np.tan(angle) unit_vectors, valid = unitize(vectors, check_valid = True) valid_index = np.nonzero(valid)[0] consumed = np.zeros(len(unit_vectors), dtype=np.bool) tree = KDTree(unit_vectors) unique_vectors = deque() aligned_index = deque() for index, vector in enumerate(unit_vectors): if consumed[index]: continue aligned = np.array(tree.query_ball_point(vector, dist_max)) vectors = unit_vectors[aligned] if include_negative: aligned_neg = tree.query_ball_point(-1.0*vector, dist_max) vectors = np.vstack((vectors, -unit_vectors[aligned_neg])) aligned = np.append(aligned, aligned_neg) aligned = aligned.astype(int) consumed[aligned] = True unique_vectors.append(np.median(vectors, axis=0)) aligned_index.append(valid_index[aligned]) return np.array(unique_vectors), np.array(aligned_index)
def find_points_in_tiles(tiles, ra, dec, radius=None): """Return a list of indices of points that are within each provided tile(s). This function is optimized to query a lot of points with relatively few tiles. radius is in units of degrees. The return value is an array of lists that contains the index of points that are in each tile. The indices are not sorted in any particular order. if tiles is a scalar, a single list is returned. default radius is from desimodel.focalplane.get_tile_radius_deg() """ from scipy.spatial import cKDTree as KDTree if radius is None: radius = focalplane.get_tile_radius_deg() # check for malformed input shapes. Sorry we currently only # deal with vector inputs. (for a sensible definition of indices) assert ra.ndim == 1 assert dec.ndim == 1 points = _embed_sphere(ra, dec) tree = KDTree(points) # radius to 3d distance threshold = 2.0 * np.sin(np.radians(radius) * 0.5) xyz = _embed_sphere(tiles['RA'], tiles['DEC']) indices = tree.query_ball_point(xyz, threshold) return indices
def get_neighbors(kd_tree: cKDTree, point_ix: int, radius: float, known=None) -> Tuple[np.ndarray, np.ndarray]: """ Get a point's neighbors within a specific radius, excluding the ones we already know about from a previous query with a small radius Parameters ---------- kd_tree point_ix radius known: set or None Returns ------- neigh_ix: list of ints Index of new neighboring points points: Numpy array Coordinates of the new neighboring points """ if known is None: known = {point_ix} neigh = kd_tree.query_ball_point(kd_tree.data[point_ix, :], radius) neigh = np.array(list(set(neigh) - set(known))) try: # normal behavior return neigh, kd_tree.data[neigh, :] except IndexError: # except for the last run, one neuron left so neigh becomes a empty list and tree.data indexing is not possible return np.array([]), np.array([]) # return empty vector
def find_neighbor_pixels(pix_x, pix_y, rad): """uses a KD-Tree to quickly find nearest neighbors of the pixels in a camera. This function can be used to find the neighbor pixels if such a list is not already present in the file. Parameters ---------- pix_x : array_like x position of each pixel pix_y : array_like y position of each pixels rad : float radius to consider neighbor it should be slightly larger than the pixel diameter. Returns ------- array of neighbor indices in a list for each pixel """ points = np.array([pix_x, pix_y]).T indices = np.arange(len(pix_x)) kdtree = KDTree(points) neighbors = [kdtree.query_ball_point(p, r=rad) for p in points] for nn, ii in zip(neighbors, indices): nn.remove(ii) # get rid of the pixel itself return neighbors
def query_index_polygon(index, polygon_geometry): """Query database for CPTs within given polygon.""" # Setup KDTree based on points npindex = np.array(index) tree = KDTree(npindex[:, 0:2]) # Find center of polygon bounding box # and circle encompasing bbox and thus polygon polygon = shape(polygon_geometry) minx, miny, maxx, maxy = polygon.bounds xr, yr = (maxx - minx) / 2, (maxy - miny) / 2 radius = math.sqrt(xr**2 + yr**2) x, y = minx + xr, miny + yr # Find all points in circle and do # intersect with actual polygon indices = [] points_slice = tree.query_ball_point((x, y), radius) points = npindex[points_slice, :] for point in points: x, y, start, end = point p = Point(x, y) if p.intersects(polygon): indices.append((int(start), int(end))) return indices
def remove_close(points, radius): """ Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius. Parameters ------------ points : (n, dimension) float Points in space radius : float Minimum radius between result points Returns ------------ culled : (m, dimension) float Points in space mask : (n,) bool Which points from the original set were returned """ from scipy.spatial import cKDTree as KDTree tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in range(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique], unique
def find_points_radec(telra, teldec, ra, dec, radius=None): """Return a list of indices of points that are within a radius of an arbitrary telra, teldec. This function is optimized to query a lot of points with a single telra and teldec. radius is in units of degrees. The return value is a list that contains the index of points that are in each tile. The indices are not sorted in any particular order. if tiles is a scalar, a single list is returned. default radius is from desimodel.focalplane.get_tile_radius_deg() Note: This is simply a modified version of find_points_in_tiles, but this function does not know about tiles. """ from scipy.spatial import cKDTree as KDTree import numpy as np if radius is None: radius = focalplane.get_tile_radius_deg() # check for malformed input shapes. Sorry we currently only # deal with vector inputs. (for a sensible definition of indices) assert ra.ndim == 1 assert dec.ndim == 1 points = _embed_sphere(ra, dec) tree = KDTree(points) # radius to 3d distance threshold = 2.0 * np.sin(np.radians(radius) * 0.5) xyz = _embed_sphere(telra, teldec) indices = tree.query_ball_point(xyz, threshold) return indices
def remove_close(points, radius): """ Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius. Parameters ------------ points : (n, dimension) float Points in space radius : float Minimum radius between result points Returns ------------ culled : (m, dimension) float Points in space mask : (n,) bool Which points from the original set were returned """ from scipy.spatial import cKDTree as KDTree tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in range(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique], unique
def get_patch_kdtree(kdtree: spatial.cKDTree, rng: np.random.RandomState, query_point, patch_radius, points_per_patch, n_jobs): if patch_radius <= 0.0: pts_dists_ms, patch_pts_ids = kdtree.query(x=query_point, k=points_per_patch, n_jobs=n_jobs) else: patch_pts_ids = kdtree.query_ball_point(x=query_point, r=patch_radius, n_jobs=n_jobs) patch_pts_ids = np.array(patch_pts_ids, dtype=np.int32) point_count = patch_pts_ids.shape[0] # if there are too many neighbors, pick a random subset if point_count > points_per_patch: patch_pts_ids = patch_pts_ids[rng.choice(np.arange(point_count), points_per_patch, replace=False)] # pad with zeros if point_count < points_per_patch: missing_points = points_per_patch - point_count padding = np.full((missing_points), -1, dtype=np.int32) if point_count == 0: patch_pts_ids = padding else: patch_pts_ids = np.concatenate((patch_pts_ids, padding), axis=0) return patch_pts_ids
def filter_neighbours(self, num_neighbours=1, elements=None, cutoff=6.0): """Remove atoms that has to few neighbours within the specified cutoff. :param int num_neighbours: Number of required neighbours :param elements: Which elements are considered part of the cluster :type elements: list of strings :param float cutoff: Cut-off radius in angstrom """ from scipy.spatial import cKDTree as KDTree if elements is None: raise TypeError("No elements given!") print("Filtering out atoms with less than {} elements " "of type {} within {} angstrom" "".format(num_neighbours, elements, cutoff)) tree = KDTree(self.cluster.get_positions()) indices_in_cluster = [] for atom in self.cluster: neighbours = tree.query_ball_point(atom.position, cutoff) num_of_correct_symbol = 0 for neigh in neighbours: if self.cluster[neigh].symbol in elements: num_of_correct_symbol += 1 if num_of_correct_symbol >= num_neighbours: indices_in_cluster.append(atom.index) self.cluster = self.cluster[indices_in_cluster] # Have to remesh everything self.mesh = self._mesh() self.surf_mesh = self._extract_surface_mesh(self.mesh)
def _findNeighbours(self, xPos, yPos, radius): ''' use a KD-Tree to quickly find nearest neighbours (e.g., of the pixels in a camera or mirror facets) Parameters ---------- xPos : array_like x position of each e.g., pixel yPos : array_like y position of each e.g., pixel radius : float radius to consider neighbour it should be slightly larger than the pixel diameter or mirror facet. Returns ------- neighbours: array_like Array of neighbour indices in a list for each e.g., pixel ''' points = np.array([xPos, yPos]).T indices = np.arange(len(xPos)) kdtree = KDTree(points) neighbours = [kdtree.query_ball_point(p, r=radius) for p in points] for neighbourNow, indexNow in zip(neighbours, indices): neighbourNow.remove( indexNow) # get rid of the pixel or mirror itself return neighbours
def _find_neighbor_pixels(pix_x, pix_y, rad): """use a KD-Tree to quickly find nearest neighbors of the pixels in a camera. This function can be used to find the neighbor pixels if they are not already present in a camera geometry file. Parameters ---------- pix_x : array_like x position of each pixel pix_y : array_like y position of each pixels rad : float radius to consider neighbor it should be slightly larger than the pixel diameter. Returns ------- array of neighbor indices in a list for each pixel """ points = np.array([pix_x, pix_y]).T indices = np.arange(len(pix_x)) kdtree = KDTree(points) neighbors = [kdtree.query_ball_point(p, r=rad) for p in points] for nn, ii in zip(neighbors, indices): nn.remove(ii) # get rid of the pixel itself return neighbors
class smoothKDtree: """ Smooth model with nearest neighbours """ def __init__(self, X, z, leafsize=3): assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z)) self.tree = KDTree(X, leafsize=leafsize) # build the tree self.z = z self.wn = 0 self.wsum = None def __call__(self, q, eps=0, p=3, distance=500): # nnear nearest neighbours of each query point -- q = np.asarray(q) qdim = q.ndim if qdim == 1: q = np.array([q]) self.distance = distance # self.ix = self.tree.query.( q, k=100, eps=eps, distance_upper_bound=distance ) # interpol = np.zeros( (len(self.distances),) + np.shape(self.z[0]) ) interpol = np.zeros((len(q), ) + np.shape(self.z[0])) jinterpol = 0 # for dist, ix in zip( self.distances, self.ix ): for v in q: # weight z s by 1/dist -- # w = 1 / dist**p # w /= np.sum(w) # wz = np.dot( w, self.z[ix] ) ix = self.tree.query_ball_point(v, eps=eps, p=p, r=distance) wzlog = np.mean(np.log10(self.z[ix])) interpol[jinterpol] = np.power(10, wzlog) jinterpol += 1 return interpol if qdim > 1 else interpol[0]
def find_tiles_over_point(tiles, ra, dec, radius=None): """Return a list of indices of tiles that covers the points. This function is optimized to query a lot of points. radius is in units of degrees. The return value is an array of list objects that are the indices of tiles that cover each point. The indices are not sorted in any particular order. if ra, dec are scalars, a single list is returned. default radius is from desimodel.focalplane.get_tile_radius_deg() """ from scipy.spatial import cKDTree as KDTree if radius is None: radius = focalplane.get_tile_radius_deg() tilecenters = _embed_sphere(tiles['RA'], tiles['DEC']) tree = KDTree(tilecenters) # radius to 3d distance threshold = 2.0 * np.sin(np.radians(radius) * 0.5) xyz = _embed_sphere(ra, dec) indices = tree.query_ball_point(xyz, threshold) return indices
def query_index(index, x, y, radius=1000.): """Query database for CPTs within radius of x, y. :param index: Index is a array with columns: x y begin end :type index: np.array :param x: X coordinate :type x: float :param y: Y coordinate :type y: float :param radius: Radius (m) to use for searching. Defaults to 1000. :type radius: float :return: 2d array of start/end (columns) for each location (rows). :rtype: np.array """ # Setup KDTree based on points npindex = np.array(index) tree = KDTree(npindex[:, 0:2]) # Query point and return slices points = tree.query_ball_point((x, y), radius) # Return slices return npindex[points, 2:4].astype(np.int64)
def group_distance(values, distance): """ Find groups of points which have neighbours closer than radius, where no two points in a group are farther than distance apart. Parameters --------- points: (n, d) points (of dimension d) distance: max distance between points in a cluster Returns ---------- unique: (m, d), median value of group groups: (m) sequence of indexes """ values = np.asanyarray(values, dtype=np.float64) consumed = np.zeros(len(values), dtype=np.bool) tree = KDTree(values) # (n, d) set of values that are unique unique = [] # (n) sequence of indicies in values groups = [] for index, value in enumerate(values): if consumed[index]: continue group = np.array(tree.query_ball_point(value, distance), dtype=np.int) consumed[group] = True unique.append(np.median(values[group], axis=0)) groups.append(group) return np.array(unique), np.array(groups)
def analyse(points, radii): points = points[:1024] closest = closest_neighbors(points) points *= 2 / closest tree = KDTree(points) indices = tree.query_ball_point(points, np.max(radii)) dists = [ np.linalg.norm(points[ind] - points[i], axis=-1) for i, ind in enumerate(indices) ] del indices means = [] maxs = [] for r in radii: counts = np.array([np.count_nonzero(d < r) for d in dists]) means.append(np.mean(counts)) maxs.append(np.max(counts)) ax = plt.gca() ax.plot(radii, means) ax.plot(radii, maxs) ax.plot(radii, approx_max_neighbors(radii, 2)) ax.set_xscale('log') ax.set_yscale('log') plt.legend(['mean', 'max', '2D approx']) ax.set_xlabel('$r$') ax.set_ylabel('$n$') plt.show()
def cull_dataset(outdir, field_ra, field_dec, table): """ Efficiently finds all neighbors within 0.01 degrees using kdt.query_ball_point method to get points within radius d, where d is the cartesian distance equivalent to 0.01 degree separation resulting from calculation: ra1, ra2 = 0, 0.01 dec1, dec2 = 0, 0 c1 = spherical_to_cartesian(ra1, dec1) c2 = spherical_to_cartesian(ra2, dec2) d = np.sqrt(sum( [ (c1[i] - c2[i])**2 for i in range(3) ] )) If there are any neighbors within 0.01 degrees of a given source, and if any of these neighbors are brighter than 2 magnitudes fainter than the source, remove the source from the table. Also use the Wang et al. 2012 relations to get the angular size of each source and remove any sources with angular size greater than 0.01 arcsec. Returns a pandas DataFrame object containing the culled dataset. """ good = (table.array['V'] != 30.0) & (table.array['K'] != 30.0) arr = table.array[good] df = get_ang_size(pd.DataFrame.from_records(arr)) # ignore sources with theta >= 0.01 arcsec df = df[df.theta < 0.01] if df.shape[0] == 0: return None ra, dec, Vmag = [np.array(i) for i in [df.RA, df.DEC, df.V]] kdt = KDT(radec_to_coords(ra, dec)) d = 0.00017453292497790891 no_neighbors = np.ones(df.shape[0]) for i in range(df.shape[0]): coords = radec_to_coords(ra[i],dec[i]) # skip the first returned index - this is the query point itself idx = kdt.query_ball_point(coords,d)[0][1:] if len(idx) < 1: continue ds = great_circle_distance(ra[i],dec[i],ra[idx],dec[idx])[0] Vmag_i = Vmag[i] Vmag_neighbors = Vmag[idx] # flag sources that have bright nearby neighbors as bad for Vmag_j in Vmag_neighbors: if Vmag_j - Vmag_i < 2: no_neighbors[i] = 0 df = df[no_neighbors.astype('bool')] log(outdir, field_ra, field_dec, df.shape[0], arr.shape[0]) return df
class NearestAtoms: def __init__(self, atomic_numbers, atomic_positions): self.nums = atomic_numbers self.positions = atomic_positions self.tree = KDTree(self.positions) def neighbours(self, pt, r): idx = self.tree.query_ball_point(pt, r) return self.nums[idx], self.positions[idx]
def refine_high_density(points, noise_n, rad=15): p_qt = Tree(points) max_len = 0 max_idx = [] for n_ in np.arange(noise_n): idx_points = p_qt.query_ball_point(points[n_], rad) if len(idx_points) > max_len: max_idx = idx_points max_len = len(idx_points) return max_idx
def density(x): # parameters n = x.shape[0] p_r = 50 p_t = 5 data = Tree(x) dist = np.zeros(n) for i in np.arange(n): points = data.query_ball_point(x[i, :], p_r) dist[i] = len(points) > p_t keep_points = np.where(dist == True)[0] remove_points = np.where(dist == False)[0] assignment = np.arange(n) for n_ in keep_points: # decide root root = assignment[n_] points = data.query_ball_point(x[n_, :], p_t) assignment[points] = root assignment[remove_points] = -1 sort_index = np.argsort(assignment) assignment = assignment[sort_index] sort_points = x[sort_index, :] unique_elements, index, mu_count = np.unique(assignment, return_counts=True, return_index=True) mu_count = mu_count[1:] k = unique_elements.shape[0] - 1 if k > 2: mu = np.zeros((k, 2)) for i_ in np.arange(index.shape[0] - 2): mu[i_, :] = np.mean(sort_points[index[i_ + 1]:index[i_ + 2], :], axis=0) mu[-1, :] = np.mean(sort_points[index[-1]:, :], axis=0) else: print("Failed Initial Condition") raise SystemExit return mu, mu_count
def geo_search(self, radius, node=None, center=False, **kwargs): '''Get a list of nodes whose coordinates are closer than *radius* to *node*.''' node = as_node(node if node is not None else self) G = self.subgraph(**kwargs) pos = nx.get_node_attributes(G, 'pos') if not pos: return [] nodes, coords = list(zip(*pos.items())) kdtree = KDTree(coords) # Cannot provide generator. indices = kdtree.query_ball_point(pos[node], radius) return [nodes[i] for i in indices if center or (nodes[i] != node)]
def cluster_MV(input, radius, acceptable_points): # set method #https://stackoverflow.com/questions/4842613/merge-lists-that-share-common-elements All = input rS = radius pairs = [(All[0][i], All[1][i], All[2][i]) for i in range(All.shape[1])] Mdl = KDTree(pairs) # Now I will loop through every point to cluster them. cluster = [[] for i in range(len(pairs))] for i in range(len(pairs)): idx_S = Mdl.query_ball_point(pairs[i], rS) # print(idx_S) # print(type(idx_S)) #list for j in idx_S: cluster[i].append(pairs[j]) # print(pairs[i]) # print(cluster[i]) # Merge clusters if they have a common point using set out = [] while len(cluster) > 0: first, *rest = cluster first = set(first) lf = -1 while len( first ) > lf: # this condition means there is no intersection between first and rest lf = len(first) rest2 = [] for r in rest: if len(first.intersection(set(r))) > 0: first |= set(r) else: rest2.append(r) rest = rest2 out.append(first) cluster = rest point_number = [] for i in range(len(out)): point_number.append(len(out[i])) point_number = np.array(point_number) out2 = [] for i in range(len(out)): if point_number[i] > acceptable_points: out2.append(out[i]) out2 = list(out2) for i in range(len(out2)): out2[i] = list(out2[i]) return out2
def remove_close(points, radius): ''' Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius ''' tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in xrange(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique]
def get_experimental_results(radii, r0=0.04, num_dims=3, num_samples=100000): points = (np.random.uniform(size=(num_samples, num_dims)) - 0.5) * 4 indices = rejection_sample_with_tree(KDTree(points), 2 * r0) points = points[indices] points /= r0 tree = KDTree(points) dist, index = tree.query(np.zeros(num_dims,), 1) del dist center = points[index] indices = tree.query_ball_point(center, np.max(radii)) neighbors = points[indices] dists = np.linalg.norm(neighbors - center, axis=-1) n = np.array([np.count_nonzero(dists < r) for r in radii]) return n
def remove_close_withfaceid(points, face_index, radius): ''' Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius ''' from scipy.spatial import cKDTree as KDTree tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in range(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique], face_index[unique]
def dataavgmap(xy, vr, xyi, radius): tree = KDTree(xy) vravg = np.arange(xyi[:, 0].size) for i in range(xyi[:, 0].size): npts = tree.query_ball_point([xyi[i, 0], xyi[i, 1]], radius) #print [xyi[j] for j in np] vrsum = 0 for j in npts: vrsum += vr[j] if len(npts) > 0: vravg[i] = vrsum / len(npts) else: vravg[i] = vrsum print("%-10d xyi: %12.2f %12.2f Points: %5d %10.3f " % (i, xyi[i, 0], xyi[i, 1], len(npts), vravg[i])) return vravg
def remove_close(points, face_index, radius): ''' Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius ''' from scipy.spatial import cKDTree as KDTree tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in range(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique], face_index[unique]
def group_distance(values, distance): consumed = np.zeros(len(values), dtype=np.bool) tree = KDTree(values) # (n, d) set of values that are unique unique = deque() # (n) sequence of indicies in values groups = deque() for index, value in enumerate(values): if consumed[index]: continue group = np.array(tree.query_ball_point(value, distance), dtype=np.int) consumed[group] = True unique.append(np.median(values[group], axis=0)) groups.append(group) return np.array(unique), np.array(groups)
def chain_edge_points(pts): tree = KDTree(np.array([(pt[0], pt[1]) for pt in pts])) def dot(p0, p1): return p0[0] * p1[0] + p0[1] * p1[1] def envec(p0, p1): return p1[0] - p0[0], p1[1] - p0[1] def perp(p0): return p0[1], -p0[0] def dist(p0, p1): return ((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)**0.5 links = bidict() for e in pts: # TODO 循环内可以优化 # nhood = [n for n in tree.search_nn_dist(e, 4) if dot(e.g, n.g) > 0] idx_nhood = [ n for n in tree.query_ball_point(np.array([e[0], e[1]]), r=2) ] nhood = [pts[idx] for idx in idx_nhood] nf = [n for n in nhood if dot(envec(e, n), perp(e.g)) > 0] nb = [n for n in nhood if dot(envec(e, n), perp(e.g)) < 0] if nf: nf_dist = [dist(e, n) for n in nf] f = nf[nf_dist.index(min(nf_dist))] if f not in links.inv or dist(e, f) < dist(links.inv[f], f): if f in links.inv: del links.inv[f] if e in links: del links[e] links[e] = f if nb: nb_dist = [dist(e, n) for n in nb] b = nb[nb_dist.index(min(nb_dist))] if b not in links or dist(b, e) < dist(b, links[b]): if b in links: del links[b] if e in links.inv: del links.inv[e] links[b] = e return links
def group_distance(values, distance): consumed = np.zeros(len(values), dtype=np.bool) tree = KDTree(values) # (n, d) set of values that are unique unique = deque() # (n) sequence of indicies in values groups = deque() for index, value in enumerate(values): if consumed[index]: continue group = np.array(tree.query_ball_point(value, distance), dtype=np.int) consumed[group] = True unique.append(np.median(values[group], axis=0)) groups.append(group) return np.array(unique), np.array(groups)
def get_rdf(pos, inside, Nbins=250, maxdist=30.0): """Radial distribution function, not normalised. For each particle tagged as inside, count the particles around and bin then with respect to distance. Need to be normalised by inside.sum() and density x volume of the spherical shell between r and r+maxdist/Nbins. - pos is a Nxd array of coordinates, with d the dimension of space - inside is a N array of booleans. For example all particles further away than maxdist from any edge of the box. - Nbins is the number of bins along r - maxdist is the maximum distance considered""" g = np.zeros(Nbins, int) #spatial indexing tree = KDTree(pos, 12) for i in np.where(inside)[0]: js = np.array(tree.query_ball_point(pos[i], maxdist)) js = js[js!=i] rs = np.sqrt(np.sum((pos[js] - pos[i])**2, -1)) / maxdist * g.shape[0] np.add.at(g, rs.astype(int), 1) return g
def change_rvir_of_lagrangian_regions_only(simulation, factor=1.2): """ Updates a simulation with a higehr virial radius definition for the lagrangian regions (all halos have their capture radius increased by factor). """ # First find the halo centers and radii centers, radii = find_all_halo_centers( simulation.snapshot_end.dark_matter.halos, simulation.snapshot_end.dark_matter.coordinates.T, boxsize=simulation.snapshot_end.header["BoxSize"], ) # Increase the radii radii *= factor # Now find the particles that lie within that radii # Need to build a tree for the dark matter tree = KDTree(simulation.snapshot_end.dark_matter.coordinates, boxsize=simulation.snapshot_end.header["BoxSize"]) # Now need to recover the actual halo numbers cut_halos = simulation.snapshot_end.dark_matter.halos[ simulation.snapshot_end.dark_matter.halos != -1] halos = np.unique(cut_halos) new_lagrangian_regions = np.empty_like( simulation.snapshot_end.dark_matter.halos, dtype=int) new_lagrangian_regions[...] = -1 for halo, center, radius in zip(halos, centers, radii): dmlist = tree.query_ball_point(x=center, r=radius, n_jobs=-1) new_lagrangian_regions[dmlist] = halo # Set this array on the DM particles simulation.snapshot_end.dark_matter.lagrangian_regions = new_lagrangian_regions # Re-identify the lagrangian regions using the original code simulation.identify_lagrangian_regions() # Return the now changed object return simulation
def init_points(points, merge_distance=20): # Merging Close-by Points p_qt = Tree(points) taken = -1 * np.ones(points.shape[0], dtype=np.int32) for i_, p_ in enumerate(points): if i_ % 250000 == 0: print('Processing:', i_, ' datapoints') idx = p_qt.query_ball_point(p_, merge_distance) if len(idx) > 1: d = cdist(p_.reshape(1, 2), points[idx, :]) pp = np.where(d == d[d > 0].min())[1][0] if taken[i_] == -1: if taken[idx[pp]] == -1: taken[idx[pp]] = i_ taken[i_] = i_ else: taken[i_] = taken[idx[pp]] elif taken[i_] > -1: if taken[idx[pp]] == -1: taken[idx[pp]] = taken[i_] else: # merge centers center1 = taken[idx[pp]] center2 = taken[i_] if not (center1 == center2): taken[taken == center2] = center1 taken[center2] = center1 # Calculating Centers sort_index = np.argsort(taken) taken = taken[sort_index] sort_points = points[sort_index, :] unique_elements, index, mu_count = np.unique(taken, return_counts=True, return_index=True) mu_count = mu_count[1:] k = unique_elements.shape[0] - 1 mu = np.zeros((k, 2)) for i_ in np.arange(index.shape[0] - 2): mu[i_, :] = np.mean(sort_points[index[i_ + 1]:index[i_ + 2], :], axis=0) mu[-1, :] = np.mean(sort_points[index[-1]:, :], axis=0) return mu, mu_count
def merge_vertices_kdtree(mesh, max_angle=None): ''' Merges vertices which are identical, AKA within Cartesian distance TOL_MERGE of each other. Then replaces references in mesh.faces If max_angle == None, vertex normals won't be looked at. if max_angle has a value, vertices will only be considered identical if they are within TOL_MERGE of each other, and the angle between their normals is less than angle_max Performance note: cKDTree requires scipy >= .12 for this query type and you probably don't want to use plain python KDTree as it is crazy slow (~1000x in tests) ''' from scipy.spatial import cKDTree as KDTree tree = KDTree(mesh.vertices) used = np.zeros(len(mesh.vertices), dtype=np.bool) inverse = np.arange(len(mesh.vertices), dtype=np.int) unique = deque() if max_angle != None: mesh.verify_normals() for index, vertex in enumerate(mesh.vertices): if used[index]: continue neighbors = np.array(tree.query_ball_point(mesh.vertices[index], TOL_MERGE)) used[[neighbors]] = True if max_angle != None: normals, aligned = group_vectors(mesh.vertex_normals[[neighbors]], max_angle = max_angle) for group in aligned: inverse[neighbors[[group]]] = len(unique) unique.append(neighbors[group[0]]) else: inverse[neighbors] = neighbors[0] unique.append(neighbors[0]) mesh.update_vertices(unique, inverse) log.debug('merge_vertices_kdtree reduced vertex count from %i to %i', len(used), len(unique))
def non_overlapping(positions, radii): """Give the mask of non-overlapping particles. Early bird.""" assert len(positions)==len(radii) tree = KDTree(positions) rmax = radii.max() good = np.ones(len(positions), dtype=bool) for i, (p,r) in enumerate(zip(positions, radii)): if not good[i]: continue for j in tree.query_ball_point(p, rmax + r): if j==i or not good[j]: continue #the python loop is actually faster than numpy on small(3) arrays s = 0.0 for pd, qd in zip(positions[j], p): s = (pd - pd)**2 if s < (radii[j] + r)**2: good[j] = False return good
def refine(self, points): """ refine the contour such as to maintain it as a constrained boundary under triangulation using a convex hull this is really the crux of the method pursued in this module we need to 'shield off' any points that lie so close to the edge such as to threaten our constrained boundary by adding a split at the projection of the point on the line, for all vertices within the swept circle of the edge, we may guarantee that a subsequent convex hull of the sphere respects our original boundary """ allpoints = np.vstack((self.vertices, points)) tree = KDTree(allpoints) cp = util.gather(self.faces, self.vertices) normal = util.normalize(np.cross(cp[:,0], cp[:,1])) mid = util.normalize(cp.sum(axis=1)) diff = np.diff(cp, axis=1)[:,0,:] radius = np.linalg.norm(diff, axis=1) / 2 def insertion_point(e, c): """calculate insertion point""" coeff = np.dot( np.linalg.pinv(cp[e].T), allpoints[c]) coeff = coeff / coeff.sum() return coeff[0], np.dot(cp[e].T, coeff) #build new curves _curve_p = [c for c in self.vertices] _curve_idx = [] for e,(m,r,cidx) in enumerate(izip( mid, radius, self.faces)): try: d,ip = min( #codepath for use in iterative scheme; only insert the most balanced split; probably makes more awkward ones obsolete anyway [insertion_point(e,v) for v in tree.query_ball_point(m, r) if not v in cidx], key=lambda x:(x[0]-0.5)**2) #sort on distance from midpoint nidx = len(_curve_p) _curve_idx.append((cidx[0], nidx)) #attach on both ends _curve_idx.append((nidx, cidx[1])) _curve_p.append(ip) #append insertion point except: _curve_idx.append(cidx) #if edge is not split, just copy it return Curve(_curve_p, _curve_idx)
class KDTreeNodeSearcher(NodeSearcher, SetupMixin): __prerequisites__ = ['nodes'] @property def nodes(self): return self._nodes @nodes.setter def nodes(self, nodes): self._nodes = nodes coords = [node.coord for node in nodes] self.kd_tree = KDTree(coords) def search_indes(self, x, rad, eps=0): rad *= (1 + eps) return self.kd_tree.query_ball_point(x, rad) def search(self, x, rad, eps=0): indes = self.search_indes(x, rad, eps) nodes = self.nodes return [nodes[i] for i in indes]
def self_intersect(self): """ test curve of arc-segments for intersection raises exception in case of intersection alternatively, we might resolve intersections by point insertion but this is unlikely to have any practical utility, and more likely to be annoying """ vertices = self.vertices faces = self.faces tree = KDTree(vertices) # curve points per edge, [n, 2, 3] cp = util.gather(faces, vertices) # normal rotating end unto start normal = util.normalize(np.cross(cp[:,0], cp[:,1])) # midpoints of edges; [n, 3] mid = util.normalize(cp.sum(axis=1)) # vector from end to start, [n, 3] diff = np.diff(cp, axis=1)[:,0,:] # radius of sphere needed to contain edge, [n] radius = np.linalg.norm(diff, axis=1) / 2 * 1.01 # FIXME: this can be vectorized by adapting pinv projector = [np.linalg.pinv(q) for q in np.swapaxes(cp, 1, 2)] # incident[vertex_index] gives a list of all indicent edge indices incident = npi.group_by(faces.flatten(), np.arange(faces.size)) def intersect(i,j): """test if spherical line segments intersect. bretty elegant""" intersection = np.cross(normal[i], normal[j]) #intersection direction of two great circles; sign may go either way though! return all(np.prod(np.dot(projector[e], intersection)) > 0 for e in (i,j)) #this direction must lie within the cone spanned by both sets of endpoints for ei,(p,r,cidx) in enumerate(izip(mid, radius, faces)): V = [v for v in tree.query_ball_point(p, r) if v not in cidx] edges = np.unique([ej for v in V for ej in incident[v]]) for ej in edges: if len(np.intersect1d(faces[ei], faces[ej])) == 0: #does not count if edges touch if intersect(ei, ej): raise Exception('The boundary curves intersect. Check your geometry and try again')
class smoothKDtree: """ Smooth model with nearest neighbours """ def __init__( self, X, z, leafsize=3): assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z)) self.tree = KDTree( X, leafsize=leafsize ) # build the tree self.z = z self.wn = 0 self.wsum = None; def __call__( self, q, eps=0, p=3, distance = 500): # nnear nearest neighbours of each query point -- q = np.asarray(q) qdim = q.ndim if qdim == 1: q = np.array([q]) self.distance = distance # self.ix = self.tree.query.( q, k=100, eps=eps, distance_upper_bound=distance ) # interpol = np.zeros( (len(self.distances),) + np.shape(self.z[0]) ) interpol = np.zeros( (len(q),) + np.shape(self.z[0]) ) jinterpol = 0 # for dist, ix in zip( self.distances, self.ix ): for v in q: # weight z s by 1/dist -- # w = 1 / dist**p # w /= np.sum(w) # wz = np.dot( w, self.z[ix] ) ix = self.tree.query_ball_point( v , eps=eps, p=p, r=distance ) wzlog = np.mean(np.log10(self.z[ix])) interpol[jinterpol] = np.power(10,wzlog) jinterpol += 1 return interpol if qdim > 1 else interpol[0]
class KDTreeSupportNodeSearcher(SupportNodeSearcher, SetupMixin): __prerequisites__ = ['nodes'] __optionals__ = [('node_radiuses', None), ('loosen', None)] __after_setup__ = ['build_tree'] def build_tree(self): nodes = self.nodes if self.node_radiuses is None: rads = [node.radius for node in nodes] self.node_radiuses = rads loosen = self.loosen if loosen is None: self.loosen = self.estimate_loosen(self.node_radiuses) elif isinstance(loosen, Number): self.loosen = loosen else: self.loosen = loosen(nodes, self.node_radiuses) self.node_coords = np.array([node.coord for node in nodes], dtype=float) self._build_kd_tree(self.node_radiuses) return self def estimate_loosen(self, rads): mean = np.mean(rads) std = np.std(rads) return mean + std def _build_kd_tree(self, rads): keys = [] medium_indes = [] loosen = self.loosen coords = self.node_coords for i in range(len(coords)): coord = coords[i] rad = rads[i] if rad <= loosen: keys.append(coord) medium_indes.append(i) else: alter_num_per_dim = ceil(rad / loosen) for alter_coord in itertools.product(*[np.linspace(c - rad + loosen, c + rad - loosen, alter_num_per_dim) for c in coord]): keys.append(alter_coord) medium_indes.append(i) if len(coords) < len(keys): self.medium_indes = np.array(medium_indes, dtype=int) self._indes_generation = np.zeros(len(self.nodes), dtype=int) self._current_generation = 1 else: self.medium_indes = None self.kd_tree = KDTree(keys) def search_indes(self, x): r = self.loosen indes = self.kd_tree.query_ball_point(x, r, p=float('inf')) if self.medium_indes is not None: indes_generation = self._indes_generation self._shift_current_generation() medium_indes = self.medium_indes current_generation = self._current_generation result = [] for i in indes: index = medium_indes[i] if indes_generation[index] == current_generation: continue indes_generation[index] = current_generation rad = self.node_radiuses[index] coord = self.node_coords[index] if np.linalg.norm(x - coord) >= rad: continue result.append(index) return result else: return [i for i in indes if np.linalg.norm(self.node_coords[i] - x) < self.node_radiuses[i]] def search_nodes(self, x): return [self.nodes[i] for i in self.search_indes(x)] def _shift_current_generation(self): if self._current_generation == sys.maxsize: self._current_generation = 1 self.index_generation.fill(0) else: self._current_generation += 1
def cull_dataset(outdir, field_ra, field_dec, table): """ Efficiently finds all neighbors within 0.01 degrees using kdt.query_ball_point method to get points within radius d, where d is the cartesian distance equivalent to 0.01 degree separation resulting from calculation: ra1, ra2 = 0, 0.01 dec1, dec2 = 0, 0 c1 = spherical_to_cartesian(ra1, dec1) c2 = spherical_to_cartesian(ra2, dec2) d = np.sqrt(sum( [ (c1[i] - c2[i])**2 for i in range(3) ] )) If there are any neighbors within 0.01 degrees of a given source, and if any of these neighbors are brighter than 2 magnitudes fainter than the source, remove the source from the table. Also use the Wang et al. 2012 relations to get the angular size of each source and remove any sources with angular size greater than 0.01 arcsec. Returns a pandas DataFrame object containing the culled dataset. """ # good = ~table.array['Vmag'].mask & ~table.array['Kmag'].mask # arr = table.array[good] # df = get_ang_size(pd.DataFrame.from_records(arr)) # # ignore sources with theta >= 0.01 arcsec # df = df[df.theta < 0.01] # right now just ignore angular size and focus only on Vmag good = ~table.array['Vmag'].mask arr = table.array[good] df = pd.DataFrame.from_records(arr) # ignore sources with Vmag < 10 or Vmag > 15.5 df = df[(df.Vmag >= 10) & (df.Vmag <= 15.5)] if df.shape[0] == 0: return None # # begin old neighbor code # ra, dec, Vmag = map(np.array, [df.RAJ2000, df.DEJ2000, df.Vmag]) # kdt = KDT(radec_to_coords(ra, dec)) # d = 0.00017453292497790891 # no_neighbors = np.ones(df.shape[0]) # for i in range(df.shape[0]): # coords = radec_to_coords(ra[i],dec[i]) # # skip the first returned index - this is the query point itself # idx = kdt.query_ball_point(coords,d)[0][1:] # if len(idx) < 1: # continue # ds = great_circle_distance(ra[i],dec[i],ra[idx],dec[idx])[0] # Vmag_i = Vmag[i] # Vmag_neighbors = Vmag[idx] # # flag sources that have bright nearby neighbors as bad # for Vmag_j in Vmag_neighbors: # if Vmag_j - Vmag_i < 2: # no_neighbors[i] = 0 # df = df[no_neighbors.astype('bool')] # # end old neighbor code # begin new neighbor code - starlight accumulator as per Amy T.'s email ra, dec, Vmag = map(np.array, [df.RAJ2000, df.DEJ2000, df.Vmag]) kdt = KDT(radec_to_coords(ra, dec)) d = 9.6962736183922984e-05 no_neighbors = np.ones(df.shape[0]) for i in range(df.shape[0]): coords = radec_to_coords(ra[i],dec[i]) # skip the first returned index - this is the query point itself idx = kdt.query_ball_point(coords,d)[0][1:] if len(idx) < 1: continue Vmag_i = Vmag[i] Vmag_neighbors = Vmag[idx] # use stellar flux accumulator to flag stars with too much nearby flux starlight_sum = 0 for Vmag_j in Vmag_neighbors: starlight_sum += 100 ** (-Vmag_j/5.) magnitude_sum = -5 * np.log10(starlight_sum) / 2. if magnitude_sum < Vmag_i: no_neighbors[i] = 0 df = df[no_neighbors.astype('bool')] # end new neighbor code log(outdir, field_ra, field_dec, df.shape[0], arr.shape[0]) return df
def get_potential_cells(coors, cmesh, centroids=None, extrapolate=True): """ Get cells that potentially contain points with the given physical coordinates. Parameters ---------- coors : array The physical coordinates. cmesh : CMesh instance The cmesh defining the cells. centroids : array, optional The centroids of the cells. extrapolate : bool If True, even the points that are surely outside of the cmesh are considered and assigned potential cells. Returns ------- potential_cells : array The indices of the cells that potentially contain the points. offsets : array The offsets into `potential_cells` for each point: a point ``ip`` is potentially in cells ``potential_cells[offsets[ip]:offsets[ip+1]]``. """ from scipy.spatial import cKDTree as KDTree if centroids is None: centroids = cmesh.get_centroids(cmesh.tdim) kdtree = KDTree(coors) conn = cmesh.get_cell_conn() cc = conn.indices.reshape(cmesh.n_el, -1) cell_coors = cmesh.coors[cc] rays = cell_coors - centroids[:, None] radii = nm.linalg.norm(rays, ord=nm.inf, axis=2).max(axis=1) potential_cells = [[]] * coors.shape[0] for ic, centroid in enumerate(centroids): ips = kdtree.query_ball_point(centroid, radii[ic], p=nm.inf) if len(ips): for ip in ips: if not len(potential_cells[ip]): potential_cells[ip] = [] potential_cells[ip].append(ic) lens = nm.array([0] + [len(ii) for ii in potential_cells], dtype=nm.int32) if extrapolate: # Deal with the points outside of the field domain - insert elements # incident to the closest mesh vertex. iin = nm.where(lens[1:] == 0)[0] if len(iin): kdtree = KDTree(cmesh.coors) ics = kdtree.query(coors[iin])[1] cmesh.setup_connectivity(0, cmesh.tdim) conn = cmesh.get_conn(0, cmesh.tdim) oo = conn.offsets for ii, ip in enumerate(iin): ik = ics[ii] potential_cells[ip] = conn.indices[oo[ik] : oo[ik + 1]] lens[ip + 1] = len(potential_cells[ip]) offsets = nm.cumsum(lens, dtype=nm.int32) potential_cells = nm.concatenate(potential_cells).astype(nm.int32) return potential_cells, offsets
class Spade: """ Class implementing Peng Qiu's SPADE algorithm, following S8 in the supplemental methods of his Nature Paper. """ nsamples = 2000 distance_metric = 1 distance_threshold = None alpha = 5 # if distance_threshold is none, then distance_threshold = median_min_dist * alpha def __init__(self, data, use_KD_tree = True): # We assume that data comes in the format stored in Flowdata class self.data = data.transpose() self.use_KD_tree = use_KD_tree if self.use_KD_tree: self._init_KD_tree() if self.use_KD_tree is False: self.kd_tree = None def run(self): """ Apply SPADE algorithm """ # Step 1: apply density dependent downsampling self.estimate_median_dist() self.compute_local_density() self.downsample() def _init_KD_tree(self): self.kd_tree = KDTree(self.data) def estimate_median_dist(self): """Estimate the median distance between cells. This is used to compute """ # Randomly selected indices if self.nsamples >= self.data.shape[1]: index = np.random.choice(self.data.shape[0], self.nsamples, replace = False) x = self.data[index,:] else: index = np.range(0,self.data.shape[0]) x = self.data # which ell_p norm is used if self.use_KD_tree: # We need to take the first two points (k=2), since distance of the point # to itself is zero. (dist, i) = self.kd_tree.query(x, k=2, p = self.distance_metric) dist = dist[:,1] else: dist = np.zeros(self.nsamples) d = np.zeros(self.data.shape[0]) for j in range(self.nsamples): err = (np.abs(x[j] - self.data))**distance_metric np.sum(err,axis=1,out=d) # give infinite distance to the point with itself d[index[j]] = float('inf') dist[j] = d.min() self.median_dist = np.median(dist) if self.distance_threshold is None: self.distance_threshold = self.alpha*self.median_dist return self.median_dist def compute_local_density_using_pairs(self): local_density = np.zeros(self.data.shape[0]) if self.use_KD_tree: pairs = self.kd_tree.query_pairs(self.distance_threshold, p = self.distance_metric) print "Found {} pairs".format(len(pairs)) for p in pairs: local_density[p[0]] += 1 local_density[p[1]] += 1 print local_density.max() def compute_local_density(self): print self.distance_threshold # This approach seems slightly faster, likely due to decreased memory # requirements if self.use_KD_tree: local_density = np.zeros(self.data.shape[0]) for j in range(self.data.shape[0]): index = self.kd_tree.query_ball_point(self.data[j], self.distance_threshold, p = self.distance_metric) local_density[j] = len(index) -1 # A slightly slower approach, I am leaving here in case of later # version changes if self.use_KD_tree and False: index = self.kd_tree.query_ball_point(self.data, self.distance_threshold, p = self.distance_metric) local_density = map(lambda i: len(i) - 1, index) print local_density self.local_density = local_density return local_density def downsample(self): target_density = 10 outlier_density = 3 local_density = self.local_density # compute the probability of keeping vector # events that are in the outlier range prob = np.less_equal(outlier_density, local_density)*np.less(local_density,target_density) downsampled_data = self.data[prob,:] # events that are in high density regions prob2 = np.less(target_density, local_density)*(target_density/(local_density + 1e-14)) downsample_index = np.random.choice(self.data.shape[0], math.ceil(prob2.sum()), replace = False, p = prob2/prob2.sum()) downsampled_data = np.append(downsampled_data, self.data[downsample_index,:]) print downsampled_data.shape self.downsampled_data = downsampled_data
def spherematch(ra1, dec1, ra2, dec2, tol=None, nnearest=1, threads=1): """ Determines the matches between two catalogues of sources with ra,dec coordinates Parameters ra1 : array-like Right Ascension in degrees of the first catalog dec1 : array-like Declination in degrees of the first catalog (shape of array must match `ra1`) ra2 : array-like Right Ascension in degrees of the second catalog dec2 : array-like Declination in degrees of the second catalog (shape of array must match `ra2`) tol : float or None, optional How close (in degrees) a match has to be to count as a match. If None, all nearest neighbors for the first catalog will be returned. nnearest : int, optional The nth neighbor to find. E.g., 1 for the nearest nearby, 2 for the second nearest neighbor, etc. Particularly useful if you want to get the nearest *non-self* neighbor of a catalog. To do this, use: ``spherematch(ra, dec, ra, dec, nnearest=2)`` if nnearest==0, all matches are returned Returns idx1 : int array Indecies into the first catalog of the matches. Will never be larger than `ra1`/`dec1`. idx2 : int array Indecies into the second catalog of the matches. Will never be larger than `ra1`/`dec1`. ds : float array Distance (in degrees) between the matches """ #convert arguments into arrays for ease of use ra1 = np.array(ra1, copy=False) dec1 = np.array(dec1, copy=False) ra2 = np.array(ra2, copy=False) dec2 = np.array(dec2, copy=False) #check to see if arguments are consistent if ra1.shape != dec1.shape: raise ValueError('ra1 and dec1 do not match!') if ra2.shape != dec2.shape: raise ValueError('ra2 and dec2 do not match!') #convert spherical coordinates into cartesian coordinates x1, y1, z1 = _spherical_to_cartesian_fast(ra1.ravel(), dec1.ravel(), threads) # this is equivalent to, but faster than just doing np.array([x1, y1, z1]) coords1 = np.empty((x1.size, 3)) coords1[:, 0] = x1 coords1[:, 1] = y1 coords1[:, 2] = z1 #convert spherical coordinates into cartesian coordinates x2, y2, z2 = _spherical_to_cartesian_fast(ra2.ravel(), dec2.ravel(), threads) # this is equivalent to, but faster than just doing np.array([x1, y1, z1]) coords2 = np.empty((x2.size, 3)) coords2[:, 0] = x2 coords2[:, 1] = y2 coords2[:, 2] = z2 #create tree structure kdt = KDT(coords2) #find neighbors if nnearest == 1: idxs2 = kdt.query(coords1)[1] elif nnearest == 0 and (tol is not None): #if you want all matches p1x, p1y, p1z = _spherical_to_cartesian_fast(90, 0, threads) p2x, p2y, p2z = _spherical_to_cartesian_fast(90, tol, threads) p1x = float(p1x) p2x = float(p2x) p1y = float(p1y) p2y = float(p2y) p1z = float(p1z) p2z = float(p2z) r = np.sqrt((p2x-p1x)**2+(p2y-p1y)**2+(p2z-p1z)**2) #cartesian tol idxs2 = kdt.query_ball_point(coords1, r)[0] elif nnearest > 1: idxs2 = kdt.query(coords1, nnearest)[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) #calculate distances between matches ds = _great_circle_distance_fast(ra1, dec1, ra2[idxs2], dec2[idxs2], threads) #if tolerance is None, then all objects will have a match idxs1 = np.arange(ra1.size) #remove matches that are beyond the tolerance seperation if (tol is not None) and nnearest != 0: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
class LocalLinearApproximation(object): def __init__(self, theta, f_theta, nfactor=None): self.theta = np.atleast_2d(theta) self.f_theta = np.atleast_2d(f_theta) self.nsamples, self.ndim = self.theta.shape _, self.nout = self.f_theta.shape if self.f_theta.shape[0] != self.nsamples: raise ValueError("dimension mismatch; there must be a realization " "for every sample") self.tree = KDTree(self.theta) self.ndef = self.get_ndef(self.ndim) if nfactor is None: nfactor = np.sqrt(self.ndim) self.ntot = max(int(nfactor * self.ndef), self.ndef + 2) def evaluate(self, theta, cross_validate=False): theta = np.atleast_1d(theta) if theta.shape != (self.ndim, ): raise ValueError("dimension mismatch; theta must have shape {0}" .format((self.ndim, ))) dists, inds = self.tree.query(theta, self.ntot) if len(inds) != self.ntot: raise ValueError("could not construct list of {0} neighbors" .format(self.ntot)) # Compute the weights. rout = dists[-1] rdef = dists[self.ndef-1] weights = (1.0 - ((dists-rdef)/(rout-rdef))**3)**3 weights *= (dists <= rout) * (dists >= rdef) weights += 1.0 * (dists < rdef) # Construct the regression matrices. A = self.get_design_matrix((self.theta[inds] - theta) / rout) AT = A.T Aw = A * weights[:, None] yw = self.f_theta[inds] * weights[:, None] Apred = self.get_design_matrix([theta]) # Evaluate the model at the requested point. ATA = np.dot(AT, Aw) ATy = np.dot(AT, yw) w = np.linalg.solve(ATA, ATy) pred = np.dot(Apred, w)[0] if not cross_validate: return pred # Leave-one-out cross validation scheme. preds = [] rng = np.arange(self.ntot) for i in rng: m = rng != i ATA = np.dot(AT[:, m], Aw[m]) ATA = np.dot(AT[:, m], Aw[m]) ATy = np.dot(AT[:, m], yw[m]) w = np.linalg.solve(ATA, ATy) preds.append(np.dot(Apred, w)) return pred, np.concatenate(preds, axis=0) def find_refinement_coords(self, theta): theta = np.atleast_1d(theta) if theta.shape != (self.ndim, ): raise ValueError("dimension mismatch; theta must have shape {0}" .format((self.ndim, ))) dists, _ = self.tree.query(theta, self.ntot) R = dists[-1] inds = self.tree.query_ball_point(theta, 3*R) thetas = self.theta[inds] def cost(t): if np.sum((t - theta)**2) > R**2: return 1e12 return np.min(np.sum((t[None, :] - thetas)**2, axis=1)) def grad_cost(t): r = t[None, :] - thetas i = np.argmin(np.sum(r**2, axis=1)) return 2 * r[i] v = minimize(cost, theta, method="L-BFGS-B", jac=grad_cost, bounds=[(t-R, t+R) for t in theta]) return v.x def get_ndef(self, ndim): return ndim + 1 def get_design_matrix(self, x): return np.concatenate((x, np.ones((len(x), 1))), axis=1)
class KDTreeSegmentSearcher(SegmentSearcher, SetupMixin): __prerequisites__ = ['segments'] __optionals__ = [('loosen', None)] __after_setup__ = ['build_tree'] def build_tree(self): segments = self.segments loosen = self.loosen seg_sizes = [norm(seg.end.coord - seg.start.coord) for seg in segments] if loosen is None: self.loosen = _estimate_loosen(seg_sizes) elif isinstance(loosen, Number): if loosen <= 0: raise ValueError() self.loosen = float(loosen) elif isinstance(loosen, Callable): self.loosen = loosen(segments) else: raise ValueError() self._gen_kdtree(seg_sizes) def _gen_kdtree(self, seg_sizes): loosen = self.loosen segments = self.segments keys = [] medium_indes = [] rad_plus = 0 for i in range(len(segments)): segment = segments[i] seg_size = seg_sizes[i] if seg_size < loosen * 2: keys.append((segment.start.coord + segment.end.coord) / 2) medium_indes.append(i) half_seg_size = seg_size / 2 if half_seg_size > rad_plus: rad_plus = half_seg_size else: new_keys_num = ceil(seg_size / loosen / 2) start = 1 / new_keys_num / 2 stop = 1 - start ts = np.linspace(start, stop, new_keys_num) u = segment.end.coord - segment.start.coord half_fake_seg_size = seg_size / new_keys_num / 2 if half_fake_seg_size > rad_plus: rad_plus = half_fake_seg_size for t in ts: keys.append((segment.start.coord + u * t)) medium_indes.append(i) self.kd_tree = KDTree(keys) self.rad_plus = rad_plus if len(keys) == len(segments): self.medium_indes = None else: self.medium_indes = np.array(medium_indes, dtype=int) self._indes_generation = np.zeros(len(self.segments), dtype=int) self._current_generation = 1 def rough_search_indes(self, x, rad, eps=0): r = (rad + self.rad_plus) * (1 + eps) indes = self.kd_tree.query_ball_point(x, r, eps=eps) if self.medium_indes is not None: indes_generation = self._indes_generation self._shift_current_generation() medium_indes = self.medium_indes current_generation = self._current_generation result = [] for i in indes: index = medium_indes[i] if indes_generation[index] == current_generation: continue else: indes_generation[index] = current_generation result.append(index) return result else: return indes def _shift_current_generation(self): if self._current_generation == sys.maxsize: self._current_generation = 1 self.index_generation.fill(0) else: self._current_generation += 1 def search_indes(self, x, rad, eps=0): rough_indes = self.rough_search_indes(x, rad, eps) segments = self.segments r = rad * (1 + eps) return [i for i in rough_indes if _distance_to_seg(x, segments[i]) < r] def search(self, x, rad, eps=0): rough_indes = self.rough_search_indes(x, rad, eps) segments = self.segments r = rad * (1 + eps) return [segments[i] for i in rough_indes if _distance_to_seg(x, segments[i]) < r]
def carte2d_and_z_match(x1, y1, z1, x2, y2, z2, ztol, stol): """ Finds matches in one catalog to another. Parameters x1 : array-like Cartesian coordinate x of the first catalog y1 : array-like Cartesian coordinate y of the first catalog (shape of array must match `x1`) z1 : array-like Cartesian coordinate z of the first catalog (shape of array must match `x1`) x2 : array-like Cartesian coordinate x of the second catalog y2 : array-like Cartesian coordinate y of the second catalog (shape of array must match `x2`) z2 : array-like Cartesian coordinate z of the second catalog (shape of array must match `x2`) ztol: float or array-like The tolarance in z direction. Its shape must match to `x1` if it is an array. stol: float or None, optional How close (in the unit of the cartesian coordinate) a match has to be to count as a match. If None, all nearest neighbors for the first catalog will be returned. nnearest : int, optional The nth neighbor to find. E.g., 1 for the nearest nearby, 2 for the second nearest neighbor, etc. Particularly useful if you want to get the nearest *non-self* neighbor of a catalog. To do this, use: ``carte2dmatch(x, y, x, y, nnearest=2)`` Returns ------- idx1 : int array Indecies into the first catalog of the matches. Will never be larger than `x1`/`y1`. idx2 : int array Indecies into the second catalog of the matches. Will never be larger than `x1`/`y1`. ds : float array Distance (in the unit of the cartesian coordinate) between the matches dz : float array Distance (in the unit of the cartesian coordinate) between the matches """ # sanitize x1 = np.array(x1, copy=False) y1 = np.array(y1, copy=False) z1 = np.array(z1, copy=False) x2 = np.array(x2, copy=False) y2 = np.array(y2, copy=False) z2 = np.array(z2, copy=False) # check if x1.shape != y1.shape or x1.shape != z1.shape: raise ValueError('x1 and y1/z1 do not match!') if x2.shape != y2.shape or x2.shape != z2.shape: raise ValueError('x2 and y2/z2 do not match!') # this is equivalent to, but faster than just doing np.array([x1, y1]) coords1 = np.empty((x1.size, 2)) coords1[:, 0] = x1 coords1[:, 1] = y1 # this is equivalent to, but faster than just doing np.array([x1, y1]) coords2 = np.empty((x2.size, 2)) coords2[:, 0] = x2 coords2[:, 1] = y2 # set kdt for coord2 kdt = KDT(coords2) # --- # Match using kdt # --- idxs2_within_balls = kdt.query_ball_point(coords1, stol) # find the neighbors within a ball n_within_ball = np.array(map(len, idxs2_within_balls), dtype = np.int) # counts within each ball zero_within_ball = np.where( n_within_ball == 0)[0] # find which one does not have neighbors nonzero_within_ball = np.where( n_within_ball > 0)[0] # find which one has neighbors # declare the distance / idxs2 for each element in nonzero_within_ball # I use no-brain looping here, slow but seems to be acceptable dz_within_ball = [] # the distance idxs2 = [] for i in nonzero_within_ball: #print i, len(idxs2_within_balls[i]), z1[i], z2[ idxs2_within_balls[i] ] # Another sub-kdt within a ball, but this times we use kdt.query to find the nearest one dz_temp, matched_id_temp = KDT( np.transpose([ z2[ idxs2_within_balls[i] ] ]) ).query( np.transpose([ z1[i] ]) ) matched_id_temp = idxs2_within_balls[i][ matched_id_temp ] # append dz_within_ball.append(dz_temp) # the distance of the nearest neighbor within the ball idxs2.append(matched_id_temp) # the index in array2 of the nearest neighbor within the ball # index for coord1 - only using the object with non-zero neighbor in the ball idxs1 = np.arange(x1.size)[ nonzero_within_ball ] idxs2 = np.array(idxs2, dtype = np.int) dz_within_ball = np.array(dz_within_ball, dtype = np.float) # clean del dz_temp, matched_id_temp # msk to clean the object with dz > ztol ztol = np.array(ztol, ndmin=1) if len(ztol) == 1: msk = ( dz_within_ball < ztol ) elif len(ztol) == len(x1): msk = ( dz_within_ball < ztol[ nonzero_within_ball ] ) else: raise ValueError("The length of ztol has to be 1 (float) or as the same as input x1/y1. len(ztol):", len(ztol)) # only keep the matches which have dz < ztol idxs1 = idxs1[ msk ] idxs2 = idxs2[ msk ] ds = np.hypot( x1[idxs1] - x2[idxs2], y1[idxs1] - y2[idxs2] ) dz = dz_within_ball[ msk ] return idxs1, idxs2, ds, dz
def match(x1, y1, m1, x2, y2, m2, dr_tol, dm_tol=None): """ Finds matches between two different catalogs. No transformations are done and it is assumed that the two catalogs are already on the same coordinate system and magnitude system. For two stars to be matched, they must be within a specified radius (dr_tol) and delta-magnitude (dm_tol). For stars with more than 1 neighbor (within the tolerances), if one is found that is the best match in both brightness and positional offsets (closest in both), then the match is made. Otherwise, their is a conflict and no match is returned for the star. Parameters x1 : array-like X coordinate in the first catalog y1 : array-like Y coordinate in the first catalog (shape of array must match `x1`) m1 : array-like Magnitude in the first catalog. Must have the same shape as x1. x2 : array-like X coordinate in the second catalog y2 : array-like Y coordinate in the second catalog (shape of array must match `x2`) m2 : array-like Magnitude in the second catalog. Must have the same shape as x2. dr_tol : float How close (in units of the first catalog) a match has to be to count as a match. For stars with more than one nearest neighbor, the delta-magnitude is checked and the closest in delta-mag is chosen. dm_tol : float or None, optional How close in delta-magnitude a match has to be to count as a match. If None, then any delta-magnitude is allowed. Returns ------- idx1 : int array Indicies into the first catalog of the matches. Will never be larger than `x1`/`y1`. idx2 : int array Indicies into the second catalog of the matches. Will never be larger than `x1`/`y1`. dr : float array Distance between the matches. dm : float array Delta-mag between the matches. (m1 - m2) """ x1 = np.array(x1, copy=False) y1 = np.array(y1, copy=False) m1 = np.array(m1, copy=False) x2 = np.array(x2, copy=False) y2 = np.array(y2, copy=False) m2 = np.array(m2, copy=False) if x1.shape != y1.shape: raise ValueError('x1 and y1 do not match!') if x2.shape != y2.shape: raise ValueError('x2 and y2 do not match!') # Setup coords1 pairs and coords 2 pairs # this is equivalent to, but faster than just doing np.array([x1, y1]) coords1 = np.empty((x1.size, 2)) coords1[:, 0] = x1 coords1[:, 1] = y1 # this is equivalent to, but faster than just doing np.array([x1, y1]) coords2 = np.empty((x2.size, 2)) coords2[:, 0] = x2 coords2[:, 1] = y2 # Utimately we will generate arrays of indices. # idxs1 is the indices for matches into catalog 1. This # is just a place holder for which stars actually # have matches. idxs1 = np.ones(x1.size, dtype=int) * -1 idxs2 = np.ones(x1.size, dtype=int) * -1 # The matching will be done using a KDTree. kdt = KDT(coords2) # This returns the number of neighbors within the specified # radius. We will use this to find those stars that have no or one # match and deal with them easily. The more complicated conflict # cases will be dealt with afterward. i2_match = kdt.query_ball_point(coords1, dr_tol) Nmatch = np.array([len(idxs) for idxs in i2_match]) # What is the largest number of matches we have for a given star? Nmatch_max = Nmatch.max() # Loop through and handle all the different numbers of matches. # This turns out to be the most efficient so we can use numpy # array operations. Remember, skip the Nmatch=0 objects... they # already have indices set to -1. for nn in range(1, Nmatch_max+1): i1_nn = np.where(Nmatch == nn)[0] if len(i1_nn) == 0: continue if nn == 1: i2_nn = np.array([i2_match[mm][0] for mm in i1_nn]) if dm_tol != None: dm = np.abs(m1[i1_nn] - m2[i2_nn]) keep = dm < dm_tol idxs1[i1_nn[keep]] = i1_nn[keep] idxs2[i1_nn[keep]] = i2_nn[keep] else: idxs1[i1_nn] = i1_nn idxs2[i1_nn] = i2_nn else: i2_tmp = np.array([i2_match[mm] for mm in i1_nn]) # Repeat star list 1 positions and magnitudes # for nn times (tile then transpose) x1_nn = np.tile(x1[i1_nn], (nn, 1)).T y1_nn = np.tile(y1[i1_nn], (nn, 1)).T m1_nn = np.tile(m1[i1_nn], (nn, 1)).T # Get out star list 2 positions and magnitudes x2_nn = x2[i2_tmp] y2_nn = y2[i2_tmp] m2_nn = m2[i2_tmp] dr = np.abs(x1_nn - x2_nn, y1_nn - y2_nn) dm = np.abs(m1_nn - m2_nn) if dm_tol != None: # Don't even consider stars that exceed our # delta-mag threshold. dr_msk = np.ma.masked_where(dm > dm_tol, dr) dm_msk = np.ma.masked_where(dm > dm_tol, dm) # Remember that argmin on masked arrays can find # one of the masked array elements if ALL are masked. # But our subsequent "keep" check should get rid of all # of these. dm_min = dm_msk.argmin(axis=1) dr_min = dr_msk.argmin(axis=1) # Double check that "min" choice is still within our # detla-mag tolerence. dm_tmp = np.choose(dm_min, dm.T) keep = (dm_min == dr_min) & (dm_tmp < dm_tol) else: dm_min = dm.argmin(axis=1) dr_min = dr.argmin(axis=1) keep = (dm_min == dr_min) i2_keep_2D = i2_tmp[keep] dr_keep = dr_min[keep] # which i2 star for a given i1 star ii_keep = np.arange(len(dr_keep)) # a running index for the i2 keeper stars. idxs1[i1_nn[keep]] = i1_nn[keep] idxs2[i1_nn[keep]] = i2_keep_2D[ii_keep, dr_keep] idxs1 = idxs1[idxs1 >= 0] idxs2 = idxs2[idxs2 >= 0] dr = np.hypot(x1[idxs1] - x2[idxs2], y1[idxs1] - y2[idxs2]) dm = m1[idxs1] - m2[idxs2] # Deal with duplicates duplicates = [item for item, count in Counter(idxs2).iteritems() if count > 1] print 'Found {0:d} out of {1:d} duplicates'.format(len(duplicates), len(dm)) # for dd in range(len(duplicates)): # dups = np.where(idxs2 == duplicates[dd])[0] # # Handle them in brightness order -- brightest first in the first starlist # fsort = m1[dups].argsort() # # For every duplicate, match to the star that is closest in space and # # magnitude. HMMMM.... this doesn't seem like it will work optimally. return idxs1, idxs2, dr, dm