class Scan(): def __init__(self, filepath, skipinterval=1, buildTree=True): self.filepath = filepath.filepath self.file = File(self.filepath, mode="r") self.scale = self.file.header.scale[0] self.offset = self.file.header.offset[0] if buildTree: self.tree = KDTree( np.vstack([ self.file.x[::skipinterval], self.file.y[::skipinterval], self.file.z[::skipinterval] ]).transpose()) self.treeexis = True self.datetime = filepath.datetime def knear(self, point, k): if not self.treeexis: raise ValueError("Tree Is Not Built") return self.tree.data[self.tree.query(point, k=k)[1]] def radialcluster(self, point, radius): if not self.treeexis: raise ValueError("Tree Is Not Built") neighbor = self.tree.data[self.tree.query(point, k=1)[1]] points = self.tree.data[self.tree.query_ball_point(neighbor, radius)] return np.array(points) def pointSet(self): return np.vstack([ self.file.x[::skipinterval], self.file.y[::skipinterval], self.file.z[::skipinterval] ])
class scan(): def __init__(self, filepath): #start = time.time() self.name = filepath self.file = File(filepath, mode="r") #self.filesize = getsizeof(self.file)/8 self.scale = self.file.header.scale[0] self.offset = self.file.header.offset[0] self.tree = KDTree( np.vstack([self.file.x, self.file.y, self.file.z]).transpose()) #self.tree.size = getsizeof(self.tree)/8 filename = splitext(basename(filepath))[0].replace("_", "") dateobj = [int(filename[i:i + 2]) for i in range(0, len(filename), 2) ] # year, month,day,hour,min,sec self.time = datetime.datetime(dateobj[0], dateobj[1], dateobj[2], dateobj[3], dateobj[4], dateobj[5], 0) #print("File Size: {}, KDTree Size: {}\n".format(self.filesize,self.treesize)) self.file = None #end = time.time() - start #print("Time Elapsed: {} for {}".format(int(np.rint(end)),basename(self.name))) def NNN(self, point, k): return self.tree.data[self.tree.query(point, k=k)[1]] def radialcluster(self, point, radius): neighbor = self.tree.data[self.tree.query(point, k=1)[1]] points = self.tree.data[self.tree.query_ball_point(neighbor, radius)] return np.array(points)
def nearest_input_pts( in_latlons: ndarray, out_latlons: ndarray, k: int ) -> Tuple[ndarray, ndarray]: """ Find k nearest source (input) points to each target (output) point, using a KDtree. Args: in_latlons: Source grid points' latitude-longitudes (N x 2). out_latlons: Target grid points' latitude-longitudes (M x 2). k: Number of points surrounding each output point. Return: - Distances from target grid point to source grid points (M x K). - Indexes of those source points (M x K). """ # Convert input latitude and longitude to XYZ coordinates, then create KDtree in_x, in_y, in_z = ecef_coords(in_latlons[:, 0].flat, in_latlons[:, 1].flat) in_coords = np.c_[in_x, in_y, in_z] in_kdtree = KDTree(in_coords) # Convert output to XYZ and query the KDtree for nearby input points out_x, out_y, out_z = ecef_coords(out_latlons[:, 0].flat, out_latlons[:, 1].flat) out_coords = np.c_[out_x, out_y, out_z] distances, indexes = in_kdtree.query(out_coords, k) # Avoid single dimension output for k=1 case if distances.ndim == 1: distances = np.expand_dims(distances, axis=1) if indexes.ndim == 1: indexes = np.expand_dims(indexes, axis=1) return distances, indexes
def run(self): """ Compute the density proxy. This attaches the following attribute: - :attr:`density` Attributes ---------- density : array_like, length: :attr:`size` a unit-less, proxy density value for each object on the local rank. This is computed as the inverse cube of the distance to the closest, nearest neighbor """ # do the domain decomposition Np = split_size_3d(self.comm.size) edges = [ numpy.linspace(0, self.attrs['BoxSize'][d], Np[d] + 1, endpoint=True) for d in range(3) ] domain = GridND(comm=self.comm, periodic=True, edges=edges) # read all position and exchange pos = self._source.compute(self._source['Position']) layout = domain.decompose(pos, smoothing=self.attrs['margin'] * self.attrs['meansep']) xpos = layout.exchange(pos) # wait for scipy 0.19.1 assert all(self.attrs['BoxSize'] == self.attrs['BoxSize'][0]) xpos[...] /= self.attrs['BoxSize'] xpos %= 1 # KDTree tree = KDTree(xpos, boxsize=1.0) d, i = tree.query(xpos, k=[8]) d = d[:, 0] # gather back to original root, taking the minimum distance d = layout.gather(d, mode=numpy.fmin) self.density = 1 / (d**3 * self.attrs['BoxSize'].prod())
def run(self): """ Compute the density proxy. This attaches the following attribute: - :attr:`density` Attributes ---------- density : array_like, length: :attr:`size` a unit-less, proxy density value for each object on the local rank. This is computed as the inverse cube of the distance to the closest, nearest neighbor """ # do the domain decomposition Np = split_size_3d(self.comm.size) edges = [numpy.linspace(0, self.attrs['BoxSize'][d], Np[d] + 1, endpoint=True) for d in range(3)] domain = GridND(comm=self.comm, periodic=True, edges=edges) # read all position and exchange pos = self._source.compute(self._source['Position']) layout = domain.decompose(pos, smoothing=self.attrs['margin'] * self.attrs['meansep']) xpos = layout.exchange(pos) # wait for scipy 0.19.1 assert all(self.attrs['BoxSize'] == self.attrs['BoxSize'][0]) xpos[...] /= self.attrs['BoxSize'] xpos %= 1 # KDTree tree = KDTree(xpos, boxsize=1.0) d, i = tree.query(xpos, k=[8]) d = d[:, 0] # gather back to original root, taking the minimum distance d = layout.gather(d, mode=numpy.fmin) self.density = 1 / (d ** 3 * self.attrs['BoxSize'].prod())
class Neighbors: """ Classifier implementing k-Nearest Neighbor Algorithm. Parameters ---------- data : array-like, shape (n, k) The data points to be indexed. This array is not copied, and so modifying this data will result in bogus results. labels : array An array representing labels for the data (only arrays of integers are supported). k : int default number of neighbors. window_size : float the default window size. Examples -------- >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> labels = [0,0,1,1] >>> neigh = Neighbors(samples, labels=labels) >>> print neigh.predict([[0,0,0]]) [0] """ def __init__(self, data, labels, k=1, window_size=1.): """ Internally uses scipy.spatial.KDTree for most of its algorithms. """ self.kdtree = KDTree(data, leafsize=20) self._k = k self.window_size = window_size self.points = np.ascontiguousarray(data) # needed for saving the state self.labels = np.asarray(labels) self.label_range = [self.labels.min(), self.labels.max()] def __getinitargs__(self): """ Returns the state of the neighboorhood """ return (self.points, self._k, self.window_size) def __setstate__(self, state): pass def __getstate__(self): return {} def kneighbors(self, data, k=None): """ Finds the K-neighbors of a point. Parameters ---------- point : array-like The new point. k : int Number of neighbors to get (default is the value passed to the constructor). Returns ------- dist : array Array representing the lenghts to point. ind : array Array representing the indices of the nearest points in the population matrix. Examples -------- In the following example, we construnct a Neighbors class from an array representing our data set and ask who's the closest point to [1,1,1] >>> import numpy as np >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> labels = [0, 0, 1] >>> neigh = Neighbors(samples, labels=labels) >>> print neigh.kneighbors([1., 1., 1.]) (0.5, 2) As you can see, it returns [0.5], and [2], which means that the element is at distance 0.5 and is the third element of samples (indexes start at 0). You can also query for multiple points: >>> print neigh.kneighbors([[0., 1., 0.], [1., 0., 1.]]) (array([ 0.5 , 1.11803399]), array([1, 2])) """ if k is None: k = self._k return self.kdtree.query(data, k=k) def parzen(self, point, window_size=None): """ Finds the neighbors of a point in a Parzen window Parameters : - point is a new point - window_size is the size of the window (default is the value passed to the constructor) """ if window_size is None: window_size = self.window_size return self.kdtree.query_ball_point(data, p=1.) def predict(self, data): """ Predict the class labels for the provided data. Parameters ---------- data: matrix An array representing the test point. Returns ------- labels: array List of class labels (one for each data sample). Examples -------- >>> import numpy as np >>> labels = [0,0,1] >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> neigh = Neighbors(samples, labels=labels) >>> print neigh.predict([.2, .1, .2]) 0 >>> print neigh.predict([[0., -1., 0.], [3., 2., 0.]]) [0 1] """ dist, ind = self.kneighbors(data) labels = self.labels[ind] if self._k == 1: return labels # search most common values along axis 1 of labels # this is much faster than scipy.stats.mode return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=labels)
def map_functionspaces_between_mesh_and_submesh(functionspace_on_mesh, mesh, functionspace_on_submesh, submesh, global_indices=True): mesh_dofs_to_submesh_dofs = dict() submesh_dofs_to_mesh_dofs = dict() # Initialize map from mesh dofs to submesh dofs, and viceversa if functionspace_on_mesh.num_sub_spaces() > 0: assert functionspace_on_mesh.num_sub_spaces() == functionspace_on_submesh.num_sub_spaces() for i in range(functionspace_on_mesh.num_sub_spaces()): (mesh_dofs_to_submesh_dofs_i, submesh_dofs_to_mesh_dofs_i) = map_functionspaces_between_mesh_and_submesh(functionspace_on_mesh.sub(i), mesh, functionspace_on_submesh.sub(i), submesh, global_indices) for (mesh_dof, submesh_dof) in mesh_dofs_to_submesh_dofs_i.items(): assert mesh_dof not in mesh_dofs_to_submesh_dofs assert submesh_dof not in submesh_dofs_to_mesh_dofs mesh_dofs_to_submesh_dofs.update(mesh_dofs_to_submesh_dofs_i) submesh_dofs_to_mesh_dofs.update(submesh_dofs_to_mesh_dofs_i) # Return return (mesh_dofs_to_submesh_dofs, submesh_dofs_to_mesh_dofs) else: assert functionspace_on_mesh.ufl_element().family() in ("Lagrange", "Discontinuous Lagrange"), "The current implementation has been tested only for Lagrange or Discontinuous Lagrange function spaces" assert functionspace_on_submesh.ufl_element().family() in ("Lagrange", "Discontinuous Lagrange"), "The current implementation has been tested only for Lagrange or Discontinuous Lagrange function spaces" mesh_element = functionspace_on_mesh.element() mesh_dofmap = functionspace_on_mesh.dofmap() submesh_element = functionspace_on_submesh.element() submesh_dofmap = functionspace_on_submesh.dofmap() for submesh_cell in cells(submesh): submesh_dof_coordinates = submesh_element.tabulate_dof_coordinates(submesh_cell) submesh_cell_dofs = submesh_dofmap.cell_dofs(submesh_cell.index()) if global_indices: submesh_cell_dofs = [functionspace_on_submesh.dofmap().local_to_global_index(local_dof) for local_dof in submesh_cell_dofs] mesh_cell = Cell(mesh, submesh.submesh_to_mesh_cell_local_indices[submesh_cell.index()]) mesh_dof_coordinates = mesh_element.tabulate_dof_coordinates(mesh_cell) mesh_cell_dofs = mesh_dofmap.cell_dofs(mesh_cell.index()) if global_indices: mesh_cell_dofs = [functionspace_on_mesh.dofmap().local_to_global_index(local_dof) for local_dof in mesh_cell_dofs] assert len(submesh_dof_coordinates) == len(mesh_dof_coordinates) assert len(submesh_cell_dofs) == len(mesh_cell_dofs) # Build a KDTree to compute distances from coordinates in mesh kdtree = KDTree(mesh_dof_coordinates) distances, mesh_indices = kdtree.query(submesh_dof_coordinates) # Map from mesh to submesh for (i, submesh_dof) in enumerate(submesh_cell_dofs): distance, mesh_index = distances[i], mesh_indices[i] assert distance < mesh_cell.h()*1e-5 mesh_dof = mesh_cell_dofs[mesh_index] if mesh_dof not in mesh_dofs_to_submesh_dofs: mesh_dofs_to_submesh_dofs[mesh_dof] = submesh_dof else: assert mesh_dofs_to_submesh_dofs[mesh_dof] == submesh_dof if submesh_dof not in submesh_dofs_to_mesh_dofs: submesh_dofs_to_mesh_dofs[submesh_dof] = mesh_dof else: assert submesh_dofs_to_mesh_dofs[submesh_dof] == mesh_dof # Broadcast in parallel if global_indices: mpi_comm = mesh.mpi_comm() if not has_pybind11(): mpi_comm = mpi_comm.tompi4py() allgathered_mesh_dofs_to_submesh_dofs = mpi_comm.bcast(mesh_dofs_to_submesh_dofs, root=0) allgathered_submesh_dofs_to_mesh_dofs = mpi_comm.bcast(submesh_dofs_to_mesh_dofs, root=0) for r in range(1, mpi_comm.size): allgathered_mesh_dofs_to_submesh_dofs.update(mpi_comm.bcast(mesh_dofs_to_submesh_dofs, root=r)) allgathered_submesh_dofs_to_mesh_dofs.update(mpi_comm.bcast(submesh_dofs_to_mesh_dofs, root=r)) else: allgathered_mesh_dofs_to_submesh_dofs = mesh_dofs_to_submesh_dofs allgathered_submesh_dofs_to_mesh_dofs = submesh_dofs_to_mesh_dofs # Return return (allgathered_mesh_dofs_to_submesh_dofs, allgathered_submesh_dofs_to_mesh_dofs)
def restriction_map(V, Vb, _all_coords=None, _all_coordsb=None): "Return a map between dofs in Vb to dofs in V. Vb's mesh should be a submesh of V's Mesh." if V.ufl_element().family( ) == "Discontinuous Lagrange" and V.ufl_element().degree() > 0: raise RuntimeError( "This function does not work for DG-spaces of degree >0 \ (several dofs associated with same point in same subspace)." ) if V.ufl_element().family() != "Lagrange": cbc_warning("This function is only tested for CG-spaces.") assert V.ufl_element().family() == Vb.ufl_element().family( ), "ufl elements differ in the two spaces" assert V.ufl_element().degree() == Vb.ufl_element().degree( ), "ufl elements differ in the two spaces" assert V.ufl_element().cell() == Vb.ufl_element().cell( ), "ufl elements differ in the two spaces" D = V.mesh().geometry().dim() # Recursively call this function if V has sub-spaces if V.num_sub_spaces() > 0: mapping = {} if MPI.size(mpi_comm_world()) == 1: if _all_coords is None: try: # For 1.6.0+ and newer all_coords = V.tabulate_dof_coordinates().reshape( V.dim(), D) all_coordsb = Vb.tabulate_dof_coordinates().reshape( Vb.dim(), D) except: # For 1.6.0 and older all_coords = V.dofmap().tabulate_all_coordinates( V.mesh()).reshape(V.dim(), D) all_coordsb = Vb.dofmap().tabulate_all_coordinates( Vb.mesh()).reshape(Vb.dim(), D) else: all_coords = _all_coords all_coordsb = _all_coordsb else: all_coords = None all_coordsb = None for i in range(V.num_sub_spaces()): mapping.update( restriction_map(V.sub(i), Vb.sub(i), all_coords, all_coordsb)) return mapping dm = V.dofmap() dmb = Vb.dofmap() N = len(dm.dofs()) Nb = len(dmb.dofs()) dofs = dm.dofs() # Extract coordinates of dofs if dm.is_view(): if _all_coords is not None: coords = _all_coords[V.dofmap().dofs()] else: try: # For 1.6.0+ and newer coords = V.collapse().tabulate_dof_coordinates().reshape(N, D) except: # For 1.6.0 and older coords = V.collapse().dofmap().tabulate_all_coordinates( V.mesh()).reshape(N, D) if _all_coordsb is not None: coordsb = _all_coordsb[Vb.dofmap().dofs()] else: try: # For 1.6.0+ and newer coordsb = Vb.collapse().tabulate_dof_coordinates().reshape( Nb, D) except: # For 1.6.0 and older coordsb = Vb.collapse().dofmap().tabulate_all_coordinates( Vb.mesh()).reshape(Nb, D) else: if LooseVersion(dolfin_version()) > LooseVersion("1.6.0"): # For 1.6.0+ and newer coords = V.tabulate_dof_coordinates().reshape(N, D) coordsb = Vb.tabulate_dof_coordinates().reshape(Nb, D) else: # For 1.6.0 and older coords = V.dofmap().tabulate_all_coordinates(V.mesh()).reshape( N, D) coordsb = Vb.dofmap().tabulate_all_coordinates(Vb.mesh()).reshape( Nb, D) # Build KDTree to compute distances from coordinates in base kdtree = KDTree(coords) eps = 1e-12 mapping = {} request_dofs = np.array([]) distances, indices = kdtree.query(coordsb) for i, subdof in enumerate(dmb.dofs()): # Find closest dof in base #d, idx = kdtree.query(coordsb[i]) d, idx = distances[i], indices[i] if d < eps: # Dof found on this process, add to map dof = dofs[idx] assert subdof not in mapping mapping[subdof] = dof else: # Search for this dof on other processes add_dofs = np.hstack(([subdof], coordsb[i])) request_dofs = np.append(request_dofs, add_dofs) del distances del indices # Scatter all dofs not found on current process to all processes all_request_dofs = [None] * MPI.size(mpi_comm_world()) for j in xrange(MPI.size(mpi_comm_world())): all_request_dofs[j] = broadcast(request_dofs, j) # Re-order all requested dofs # Remove items coming from this process all_request_dofs[MPI.rank(mpi_comm_world())] = [] all_request_dofs = np.hstack(all_request_dofs) all_request_dofs = all_request_dofs.reshape( len(all_request_dofs) / (D + 1), D + 1) all_request_dofs = dict( zip(all_request_dofs[:, 0], all_request_dofs[:, 1:])) # Search this process for all dofs not found on same process as subdof for subdof, coordsbi in all_request_dofs.items(): subdof = int(subdof) # Find closest dof in base d, idx = kdtree.query(coordsbi) if d < eps: # Dof found on this process, add to map dof = dofs[idx] assert subdof not in mapping mapping[subdof] = dof return mapping
def prepare_graph(self, pf): """ Prepares the graph from the data stored in the PixelFrame pf. :param pf: PixelFrame :return: """ endpoint_tree_data = clean_by_radius(pf.endpoints, NodeEndpointMergeRadius.value / self.calibration) junction_tree_data = clean_by_radius(pf.junctions, NodeJunctionMergeRadius.value / self.calibration) e_length = len(endpoint_tree_data) j_length = len(junction_tree_data) total_length = e_length + j_length data = np.r_[endpoint_tree_data, junction_tree_data] endpoint_tree_data = data[:e_length] junction_tree_data = data[e_length:] if e_length > 0: endpoint_tree = KDTree(endpoint_tree_data) else: endpoint_tree = None if j_length > 0: junction_tree = KDTree(junction_tree_data) else: junction_tree = None junction_shift = e_length endpoint_shift = 0 # while ends and junctions need to remain different, # they are put in the same graph / adjacency matrix # so, first come end nodes, then junction nodes # => shifts adjacency = lil_matrix((total_length, total_length), dtype=float) # little bit of nomenclature: # a pathlet (pixel graph so to say) is a path of on the image # its begin is the 'left' l_ side, its end is the 'right' r_ side # (not using begin / end not to confuse end with endpoint ...) distance_threshold = NodeLookupRadius.value / self.calibration cutoff_radius = NodeLookupCutoffRadius.value / self.calibration for pathlet in pf.pathlets: pathlet_length = calculate_length(pathlet) l_side = pathlet[0] r_side = pathlet[-1] # experiment l_test_distance, l_test_index = endpoint_tree.query(l_side, k=1) if l_test_distance < distance_threshold: l_is_end = True else: # original code l_is_end = pf.endpoints_map[l_side[0], l_side[1]] # experiment r_test_distance, r_test_index = endpoint_tree.query(r_side, k=1) if r_test_distance < distance_threshold: r_is_end = True else: # original code r_is_end = pf.endpoints_map[r_side[0], r_side[1]] l_index_shift = endpoint_shift if l_is_end else junction_shift r_index_shift = endpoint_shift if r_is_end else junction_shift l_tree = endpoint_tree if l_is_end else junction_tree r_tree = endpoint_tree if r_is_end else junction_tree # first tuple value would be distance, but we don't care try: l_distance, l_index = l_tree.query(l_side, k=1) r_distance, r_index = r_tree.query(r_side, k=1) except AttributeError: continue if l_distance > cutoff_radius or r_distance > cutoff_radius: # probably does not happen continue adjacency_left_index = l_index + l_index_shift adjacency_right_index = r_index + r_index_shift adjacency[adjacency_left_index, adjacency_right_index] = pathlet_length adjacency[adjacency_right_index, adjacency_left_index] = pathlet_length self.junction_shift = junction_shift self.endpoint_shift = endpoint_shift self.data = data self.endpoint_tree = endpoint_tree self.junction_tree = junction_tree self.endpoint_tree_data = endpoint_tree_data self.junction_tree_data = junction_tree_data self.adjacency = adjacency self.every_endpoint = range(self.endpoint_shift, self.junction_shift) self.every_junction = range(self.junction_shift, self.junction_shift + len(self.junction_tree_data)) cleanup_graph_after_creation = True if cleanup_graph_after_creation: self.cleanup_adjacency() self.adjacency = self.adjacency.tocsr() self.generate_derived_data()