def kneighbors(self,y,sparse=True): ''' Obtain the k nearest neighbors of the query dataset y ''' if self.__x is None: raise ValueError('Input dataset not fitted yet! Call .fit() first!') if type(y)!=np.ndarray: raise ValueError("Query dataset must be a numpy ndarray") if len(y.shape)!=2: raise ValueError('Query dataset must be a 2D array') if self.__x.shape[-1]!=y.shape[-1]: raise ValueError('Query and dataset must have same dimensions') y_labels=self.__assign(y,self.__c) y_ranges,_,_ = cluster_ranges_centroids(y, y_labels) y, y_labels = self.__sort_clusters(y, y_labels,store_x=False) x_LT=LazyTensor(np.expand_dims(self.__x,0)) y_LT=LazyTensor(np.expand_dims(y,1)) D_ij=((y_LT-x_LT)**2).sum(-1) ranges_ij = from_matrix(y_ranges,self.__x_ranges,self.__keep) D_ij.ranges=ranges_ij if self.__use_gpu: D_ij.backend='GPU' else: D_ij.backend='CPU' nn=D_ij.argKmin(K=self.__k,axis=1) return self.__unsort(nn)
def gaussian_kernel(x, y, sigma=0.1): x_i = LazyTensor(x[:, None, :]) # (M, 1, 1) y_j = LazyTensor(y[None, :, :]) # (1, N, 1) D_ij = ((x_i - y_j)**2).sum( -1) # (M, N) symbolic matrix of squared distances return (-D_ij / (2 * sigma**2)).exp() # (M, N) symbolic Gaussian kernel matrix
def KMeans(x, K=10, Niter=10, verbose=True): N, D = x.shape # Number of samples, dimension of the ambient space # K-means loop: # - x is the point cloud, # - cl is the vector of class labels # - c is the cloud of cluster centroids start = time.time() c = np.copy(x[:K, :]) # Simplistic random initialization x_i = LazyTensor(x[:, None, :]) # (Npoints, 1, D) for i in range(Niter): c_j = LazyTensor(c[None, :, :]) # (1, Nclusters, D) D_ij = ((x_i - c_j)**2).sum( -1) # (Npoints, Nclusters) symbolic matrix of squared distances cl = D_ij.argmin(axis=1).astype(int).reshape( N) # Points -> Nearest cluster Ncl = np.bincount(cl).astype(dtype) # Class weights for d in range(D): # Compute the cluster centroids with np.bincount: c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl end = time.time() if verbose: print("K-means example with {:,} points in dimension {:,}, K = {:,}:". format(N, D, K)) print('Timing for {} iterations: {:.5f}s = {} x {:.5f}s\n'.format( Niter, end - start, Niter, (end - start) / Niter)) return cl, c
def Kinv_scipy(x, b, gamma, alpha): x_i, y_j = LazyTensor( gamma * x[:, None, :]), LazyTensor( gamma * x[None, :, :]) K_ij = (- ((x_i - y_j) ** 2).sum(2)).exp() A = aslinearoperator(diags(alpha * np.ones(x.shape[0]))) + aslinearoperator(K_ij) A.dtype = np.dtype('float32') res = cg(A, b) return res
def laplacian_kernel(x, y, sigma=0.1): x_i = LazyTensor(x[:, None, :]) # (M, 1, 1) y_j = LazyTensor(y[None, :, :]) # (1, N, 1) D_ij = ((x_i - y_j)**2).sum( -1) # (M, N) symbolic matrix of squared distances return (-D_ij.sqrt() / sigma).exp() # (M, N) symbolic Laplacian kernel matrix
def fun(x, y, a, b, backend): if backend == "keops": x = LazyTensor(x) y = LazyTensor(y) conj = ComplexLazyTensor.conj angle = ComplexLazyTensor.angle else: conj = np.conj angle = np.angle Kxy = ((x * y) * y.real + x + x.real).sum(axis=2) return Kxy.sum(axis=0)
def Kinv_scipy(x, b, gamma, alpha, **kwargs): x_i = LazyTensor(np.sqrt(gamma) * x[:, None, :]) y_j = LazyTensor(np.sqrt(gamma) * x[None, :, :]) K_ij = (-((x_i - y_j) ** 2).sum(2)).exp() A = aslinearoperator(diags(alpha * np.ones(x.shape[0]))) + aslinearoperator(K_ij) A.dtype = np.dtype("float32") res = cg(A, b) return res
def __k_argmin(self,x,y,k=1): x_LT=LazyTensor(np.expand_dims(x, 1)) y_LT=LazyTensor(np.expand_dims(y, 0)) d=((x_LT-y_LT)**2).sum(-1) if self.__use_gpu: d.backend='GPU' else: d.backend='CPU' if k==1: return d.argmin(dim=1).flatten() else: return d.argKmin(K=k,dim=1)
def kmeans(x, K=10, Niter=15, metric="euclidean", device="CPU"): distance = numpytools.distance_function(metric) N, D = x.shape c = np.copy(x[:K, :]) x_i = LazyTensor(x[:, None, :]) for i in range(Niter): c_j = LazyTensor(c[None, :, :]) D_ij = distance(x_i, c_j) D_ij.backend = device cl = D_ij.argmin(axis=1).astype(int).reshape(N) Ncl = np.bincount(cl).astype(dtype="float32") for d in range(D): c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl return cl, c
def __KMeans(self,x, K=10, Niter=15): N, D = x.shape c = np.copy(x[:K, :]) x_i = LazyTensor(x[:, None, :]) for i in range(Niter): c_j = LazyTensor(c[None, :, :]) D_ij = ((x_i - c_j) ** 2).sum(-1) if self.__use_gpu: D_ij.backend='GPU' else: D_ij.backend='CPU' cl = D_ij.argmin(axis=1).astype(int).reshape(N) Ncl = np.bincount(cl).astype(dtype = "float32") for d in range(D): c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl return cl, c
def test_LazyTensor_sum(self): ############################################################ from pykeops.numpy import LazyTensor full_results = [] for use_keops in [True, False]: results = [] for (x, l, y, s) in [ (self.X.astype(t), self.L.astype(t), self.Y.astype(t), self.S.astype(t)) for t in self.type_to_test ]: x_i = x[:, :, :, None, :] l_i = l[:, :, :, None, :] y_j = y[:, :, None, :, :] s_p = s[:, :, None, None, :] if use_keops: x_i, l_i, y_j, s_p = ( LazyTensor(x_i), LazyTensor(l_i), LazyTensor(y_j), LazyTensor(s_p), ) D_ij = ((l_i + x_i * y_j) ** 2 + s_p).sum(-1) if use_keops: K_ij = 1 / (1 + D_ij).exp() else: K_ij = 1 / np.exp(1 + D_ij) a_i = K_ij.sum(self.nbatchdims + 1) if use_keops: a_i = a_i.squeeze(-1) results += [a_i] full_results.append(results) for (res_keops, res_numpy) in zip(full_results[0], full_results[1]): self.assertTrue(res_keops.shape == res_numpy.shape) self.assertTrue(np.allclose(res_keops, res_numpy, atol=1e-3))
def test_basic_op(): import pykeops M, N = 1000, 2000 x = np.random.rand(M, 2) y = np.random.rand(N, 2) from pykeops.numpy import LazyTensor # pykeops.clean_pykeops() x_i = LazyTensor( x[:, None, :] ) # (M, 1, 2) KeOps LazyTensor, wrapped around the numpy array x y_j = LazyTensor( y[None, :, :] ) # (1, N, 2) KeOps LazyTensor, wrapped around the numpy array y D_ij = ((x_i - y_j)**2) # **Symbolic** (M, N) matrix of squared distances foo = D_ij.sum_reduction(axis=0, backend="GPU") print(foo)
def kmeans(x, distance=None, K=10, Niter=15, device="CPU", approx=False, n=0): if distance is None: distance = numpytools.distance_function("euclidean") if approx: raise ValueError("Approx not supported on numpy version") from pykeops.numpy import LazyTensor N, D = x.shape c = np.copy(x[:K, :]) x_i = LazyTensor(x[:, None, :]) for i in range(Niter): c_j = LazyTensor(c[None, :, :]) D_ij = distance(x_i, c_j) D_ij.backend = device cl = D_ij.argmin(axis=1).astype(int).reshape(N) Ncl = np.bincount(cl).astype(dtype="float32") for d in range(D): c[:, d] = np.bincount(cl, weights=x[:, d]) / Ncl return cl, c
def _upgrade_kernel_input_to_keops_tensor(struct_data_instance): from pykeops.numpy import LazyTensor for key, val in struct_data_instance.__dict__.items(): struct_data_instance.__dict__[key] = LazyTensor(val.astype('float32'))
def LazyTensor(x): return LazyTensor(x)
areas = (x_ranges[:,1]-x_ranges[:,0])[:,None] \ * (y_ranges[:,1]-y_ranges[:,0])[None,:] total_area = np.sum(areas) # should be equal to N*M sparse_area = np.sum(areas[keep]) print("We keep {:.2e}/{:.2e} = {:2d}% of the original kernel matrix.".format( sparse_area, total_area, int(100 * sparse_area / total_area))) print("") #################################################################### # Benchmark a block-sparse Gaussian convolution # ------------------------------------------------- # # Define a Gaussian kernel matrix from 2d point clouds: x_, y_ = x / sigma, y / sigma x_i, y_j = LazyTensor(x_[:, None, :]), LazyTensor(y_[None, :, :]) D_ij = ((x_i - y_j)**2).sum( dim=2) # Symbolic (M,N,1) matrix of squared distances K = (-D_ij / 2).exp() # Symbolic (M,N,1) Gaussian kernel matrix ##################################################################### # And create a random signal supported by the points :math:`y_j`: b = np.random.randn(N, 1).astype(dtype) ############################################################################## # Compare the performances of our **block-sparse** code # with those of a **dense** implementation, on both CPU and GPU backends: # # .. note:: # The standard KeOps routine are already *very* efficient:
def brute_force(self,x,y,k=5): x_LT=LazyTensor(np.expand_dims(x,0)) y_LT=LazyTensor(np.expand_dims(y,1)) D_ij=((y_LT-x_LT)**2).sum(-1) return D_ij.argKmin(K=k,axis=1)
# Peform the K-NN classification, with a fancy display: # plt.figure(figsize=(12, 8)) plt.subplot(2, 3, 1) plt.scatter(x[:, 0], x[:, 1], c=cl, s=2) plt.imshow(np.ones((2, 2)), extent=(0, 1, 0, 1), alpha=0) plt.axis("off") plt.axis([0, 1, 0, 1]) plt.title("{:,} data points,\n{:,} grid points".format(N, M * M)) for (i, K) in enumerate((1, 3, 10, 20, 50)): start = time.time() # Benchmark: G_i = LazyTensor(g[:, None, :]) # (M**2, 1, 2) X_j = LazyTensor(x[None, :, :]) # (1, N, 2) D_ij = ((G_i - X_j)**2).sum( -1) # (M**2, N) symbolic matrix of squared distances indKNN = D_ij.argKmin(K, dim=1) # Grid <-> Samples, (M**2, K) integer tensor clg = np.mean(cl[indKNN], axis=1) > 0.5 # Classify the Grid points end = time.time() plt.subplot(2, 3, i + 2) # Fancy display: clg = np.reshape(clg, (M, M)) plt.imshow(clg, extent=(0, 1, 0, 1), origin="lower") plt.axis("off") plt.axis([0, 1, 0, 1]) plt.tight_layout()
# Define our dataset: # M = 5000 # Number of "i" points N = 4000 # Number of "j" points D = 3 # Dimension of the ambient space Dv = 2 # Dimension of the vectors x = 2 * np.random.randn(M, D) y = 2 * np.random.randn(N, D) b = np.random.rand(N, Dv) # KeOps implementation with the helper WarmUpGpu() start = time.time() c = kf.sum((Vi(x) - Vj(y)) ** 2, axis=2) c = kf.sumsoftmaxweight(c, Vj(b), axis=1) print("Timing (KeOps implementation): ", round(time.time() - start, 5), "s") # compare with direct implementation start = time.time() cc = np.sum((x[:, None, :] - y[None, :, :]) ** 2, axis=2) cc -= np.max(cc, axis=1)[:, None] # Subtract the max to prevent numeric overflows cc = np.exp(cc) @ b / np.sum(np.exp(cc), axis=1)[:, None] print("Timing (Numpy implementation): ", round(time.time() - start, 5), "s") print("Relative error : ", (np.linalg.norm(c - cc) / np.linalg.norm(c)).item()) # Plot the results next to each other: for i in range(Dv): plt.subplot(Dv, 1, i + 1)
# %% import numpy as np M, N = 1000, 2000 x = np.random.rand(M, 2) y = np.random.rand(N, 2) from pykeops.numpy import LazyTensor import pykeops pykeops.verbose = True x_i = LazyTensor( x[:, None, :]) # (M, 1, 2) KeOps LazyTensor, wrapped around the numpy array x y_j = LazyTensor( y[None, :, :] ) # (1, N, 2) KeOps LazyTensor, wrapped around the numpy array y D_ij = ((x_i - y_j)**2) # **Symbolic** (M, N) matrix of squared distances foo = D_ij.sum_reduction(axis=0, backend="GPU") print(foo) pykeops.test_numpy_bindings()