class scan(): def __init__(self, filepath): #start = time.time() self.name = filepath self.file = File(filepath, mode="r") #self.filesize = getsizeof(self.file)/8 self.scale = self.file.header.scale[0] self.offset = self.file.header.offset[0] self.tree = KDTree( np.vstack([self.file.x, self.file.y, self.file.z]).transpose()) #self.tree.size = getsizeof(self.tree)/8 filename = splitext(basename(filepath))[0].replace("_", "") dateobj = [int(filename[i:i + 2]) for i in range(0, len(filename), 2) ] # year, month,day,hour,min,sec self.time = datetime.datetime(dateobj[0], dateobj[1], dateobj[2], dateobj[3], dateobj[4], dateobj[5], 0) #print("File Size: {}, KDTree Size: {}\n".format(self.filesize,self.treesize)) self.file = None #end = time.time() - start #print("Time Elapsed: {} for {}".format(int(np.rint(end)),basename(self.name))) def NNN(self, point, k): return self.tree.data[self.tree.query(point, k=k)[1]] def radialcluster(self, point, radius): neighbor = self.tree.data[self.tree.query(point, k=1)[1]] points = self.tree.data[self.tree.query_ball_point(neighbor, radius)] return np.array(points)
class Scan(): def __init__(self, filepath, skipinterval=1, buildTree=True): self.filepath = filepath.filepath self.file = File(self.filepath, mode="r") self.scale = self.file.header.scale[0] self.offset = self.file.header.offset[0] if buildTree: self.tree = KDTree( np.vstack([ self.file.x[::skipinterval], self.file.y[::skipinterval], self.file.z[::skipinterval] ]).transpose()) self.treeexis = True self.datetime = filepath.datetime def knear(self, point, k): if not self.treeexis: raise ValueError("Tree Is Not Built") return self.tree.data[self.tree.query(point, k=k)[1]] def radialcluster(self, point, radius): if not self.treeexis: raise ValueError("Tree Is Not Built") neighbor = self.tree.data[self.tree.query(point, k=1)[1]] points = self.tree.data[self.tree.query_ball_point(neighbor, radius)] return np.array(points) def pointSet(self): return np.vstack([ self.file.x[::skipinterval], self.file.y[::skipinterval], self.file.z[::skipinterval] ])
class Neighbors: """ Classifier implementing k-Nearest Neighbor Algorithm. Parameters ---------- data : array-like, shape (n, k) The data points to be indexed. This array is not copied, and so modifying this data will result in bogus results. labels : array An array representing labels for the data (only arrays of integers are supported). k : int default number of neighbors. window_size : float the default window size. Examples -------- >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> labels = [0,0,1,1] >>> neigh = Neighbors(samples, labels=labels) >>> print neigh.predict([[0,0,0]]) [0] """ def __init__(self, data, labels, k=1, window_size=1.): """ Internally uses scipy.spatial.KDTree for most of its algorithms. """ self.kdtree = KDTree(data, leafsize=20) self._k = k self.window_size = window_size self.points = np.ascontiguousarray(data) # needed for saving the state self.labels = np.asarray(labels) self.label_range = [self.labels.min(), self.labels.max()] def __getinitargs__(self): """ Returns the state of the neighboorhood """ return (self.points, self._k, self.window_size) def __setstate__(self, state): pass def __getstate__(self): return {} def kneighbors(self, data, k=None): """ Finds the K-neighbors of a point. Parameters ---------- point : array-like The new point. k : int Number of neighbors to get (default is the value passed to the constructor). Returns ------- dist : array Array representing the lenghts to point. ind : array Array representing the indices of the nearest points in the population matrix. Examples -------- In the following example, we construnct a Neighbors class from an array representing our data set and ask who's the closest point to [1,1,1] >>> import numpy as np >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> labels = [0, 0, 1] >>> neigh = Neighbors(samples, labels=labels) >>> print neigh.kneighbors([1., 1., 1.]) (0.5, 2) As you can see, it returns [0.5], and [2], which means that the element is at distance 0.5 and is the third element of samples (indexes start at 0). You can also query for multiple points: >>> print neigh.kneighbors([[0., 1., 0.], [1., 0., 1.]]) (array([ 0.5 , 1.11803399]), array([1, 2])) """ if k is None: k = self._k return self.kdtree.query(data, k=k) def parzen(self, point, window_size=None): """ Finds the neighbors of a point in a Parzen window Parameters : - point is a new point - window_size is the size of the window (default is the value passed to the constructor) """ if window_size is None: window_size = self.window_size return self.kdtree.query_ball_point(data, p=1.) def predict(self, data): """ Predict the class labels for the provided data. Parameters ---------- data: matrix An array representing the test point. Returns ------- labels: array List of class labels (one for each data sample). Examples -------- >>> import numpy as np >>> labels = [0,0,1] >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> neigh = Neighbors(samples, labels=labels) >>> print neigh.predict([.2, .1, .2]) 0 >>> print neigh.predict([[0., -1., 0.], [3., 2., 0.]]) [0 1] """ dist, ind = self.kneighbors(data) labels = self.labels[ind] if self._k == 1: return labels # search most common values along axis 1 of labels # this is much faster than scipy.stats.mode return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=labels)