def kneighbors(self, X, n_neighbors=None): """Finds the K-neighbors of a point. Returns distance Parameters ---------- X : array-like, last dimension same as that of fit data The new point. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). Returns ------- dist : array Array representing the lengths to point, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ if n_neighbors is not None: self.n_neighbors = n_neighbors if isinstance(X, np.ndarray): X = expr.from_numpy(X) if self.algorithm in ('auto', 'brute'): X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1])) fit_X_broadcast = expr.reshape(self.X, (1, self.X.shape[0], self.X.shape[1])) distances = expr.sum((X_broadcast - fit_X_broadcast) ** 2, axis=2) neigh_ind = expr.argsort(distances, axis=1) neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom() neigh_dist = expr.sort(distances, axis=1) neigh_dist = expr.sqrt(neigh_dist[:, :n_neighbors]).optimized().glom() return neigh_dist, neigh_ind else: results = self.X.foreach_tile(mapper_fn=_knn_mapper, kw={'X': self.X, 'Q': X, 'n_neighbors': self.n_neighbors, 'algorithm': self.algorithm}) dist = None ind = None """ Get the KNN candidates for each tile of X, then find out the real KNN """ for k, v in results.iteritems(): if dist is None: dist = v[0] ind = v[1] else: dist = np.concatenate((dist, v[0]), axis=1) ind = np.concatenate((ind, v[1]), axis=1) mask = np.argsort(dist, axis=1)[:, :self.n_neighbors] new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)]) new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)]) return new_dist, new_ind
def test_ndimension(self): for case in xrange(5): dim = np.random.randint(low=2, high=6) shape = np.random.randint(low=5, high=11, size=dim) util.log_info('Test Case #%s: DIM(%s) shape%s', case + 1, dim, shape) na = new_ndarray(shape) a = expr.from_numpy(na) for axis in xrange(dim): Assert.all_eq(expr.sort(a, axis).glom(), np.sort(na, axis)) Assert.all_eq(expr.argsort(a, axis).glom(), np.argsort(na, axis))
def benchmark_sort(ctx, timer): A = expr.rand(10, 10, 10).force() T = expr.sort(A) print np.all(np.equal(T.glom(), np.sort(A.glom(), axis=None)))
def kneighbors(self, X, n_neighbors=None): """Finds the K-neighbors of a point. Returns distance Parameters ---------- X : array-like, last dimension same as that of fit data The new point. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). Returns ------- dist : array Array representing the lengths to point, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ if n_neighbors is not None: self.n_neighbors = n_neighbors if isinstance(X, np.ndarray): X = expr.from_numpy(X) if self.algorithm in ('auto', 'brute'): X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1])) fit_X_broadcast = expr.reshape( self.X, (1, self.X.shape[0], self.X.shape[1])) distances = expr.sum((X_broadcast - fit_X_broadcast)**2, axis=2) neigh_ind = expr.argsort(distances, axis=1) neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom() neigh_dist = expr.sort(distances, axis=1) neigh_dist = expr.sqrt( neigh_dist[:, :n_neighbors]).optimized().glom() return neigh_dist, neigh_ind else: results = self.X.foreach_tile(mapper_fn=_knn_mapper, kw={ 'X': self.X, 'Q': X, 'n_neighbors': self.n_neighbors, 'algorithm': self.algorithm }) dist = None ind = None """ Get the KNN candidates for each tile of X, then find out the real KNN """ for k, v in results.iteritems(): if dist is None: dist = v[0] ind = v[1] else: dist = np.concatenate((dist, v[0]), axis=1) ind = np.concatenate((ind, v[1]), axis=1) mask = np.argsort(dist, axis=1)[:, :self.n_neighbors] new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)]) new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)]) return new_dist, new_ind