Exemple #1
0
    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor, 
                               sigma:float = 1., eps:float = 0.05):
        ''' 
        Helper function to preprocess data for block-sparse reduction
        of the Gaussian kernel
    
        Args: 
            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
            eps[float] = size for square bins

        Returns:
            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
                                set sparse ranges
        '''

        # class labels
        x_labels = grid_cluster(x, eps) 
        y_labels = grid_cluster(y, eps) 
        # compute one range and centroid per class
        x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
        y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
        # sort points
        x, x_labels = sort_clusters(x, x_labels)
        y, y_labels = sort_clusters(y, y_labels) 
        # Compute a coarse Boolean mask:
        D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
        keep = D < (4 * sigma) ** 2  # self.sigma 
        # mask -> set of integer tensors
        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
        K_ij.ranges = ranges_ij  # block-sparsity pattern

        return K_ij
Exemple #2
0
  def kneighbors(self,y,sparse=True):
    '''
    Obtain the k nearest neighbors of the query dataset y
    '''
    if self.__x is None:
      raise ValueError('Input dataset not fitted yet! Call .fit() first!')
    if type(y)!=np.ndarray:
      raise ValueError("Query dataset must be a numpy ndarray")
    if len(y.shape)!=2:
      raise ValueError('Query dataset must be a 2D array')      
    if self.__x.shape[-1]!=y.shape[-1]:
      raise ValueError('Query and dataset must have same dimensions')

    y_labels=self.__assign(y,self.__c)
    y_ranges,_,_ = cluster_ranges_centroids(y, y_labels)

    y, y_labels = self.__sort_clusters(y, y_labels,store_x=False)   
    
    x_LT=LazyTensor(np.expand_dims(self.__x,0))
    y_LT=LazyTensor(np.expand_dims(y,1))
    D_ij=((y_LT-x_LT)**2).sum(-1)
    ranges_ij = from_matrix(y_ranges,self.__x_ranges,self.__keep)
    D_ij.ranges=ranges_ij
    if self.__use_gpu:
      D_ij.backend='GPU'
    else:
      D_ij.backend='CPU'
    nn=D_ij.argKmin(K=self.__k,axis=1)
    return self.__unsort(nn)
    def _Gauss_block_sparse_pre(self, x: np.array, y: np.array,
                                K_ij: LazyTensor):
        ''' 
        Helper function to preprocess data for block-sparse reduction
        of the Gaussian kernel
    
        Args: 
            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
            eps[float] = size for square bins
        Returns:
            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
                                set sparse ranges
        '''
        # labels for low dimensions
        if x.shape[1] < 4 or y.shape[1] < 4:
            x_labels = grid_cluster(x, self.eps)
            y_labels = grid_cluster(y, self.eps)
            # range and centroid per class
            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
        else:
            # labels for higher dimensions
            x_labels, x_centroids = self._KMeans(x)
            y_labels, y_centroids = self._KMeans(y)
            # compute ranges
            x_ranges = cluster_ranges(x_labels)
            y_ranges = cluster_ranges(y_labels)

        # sort points
        x, x_labels = sort_clusters(x, x_labels)
        y, y_labels = sort_clusters(y, y_labels)
        # Compute a coarse Boolean mask:
        if self.kernel == 'rbf':
            D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2,
                       2)
        elif self.kernel == 'exp':
            D = np.sqrt(
                np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2,
                       2))
        keep = D < (self.mask_radius)**2
        # mask -> set of integer tensors
        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
        K_ij.ranges = ranges_ij  # block-sparsity pattern

        return K_ij
sigma = .05  # Characteristic length of interaction
start = time.time()

# Compute a coarse Boolean mask:
D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2, 2)
keep = D < (4 * sigma)**2

##############################################################################
# To turn this mask into a set of integer Tensors which
# is more palatable to KeOps's low-level CUDA API,
# we then use the :func:`from_matrix <pykeops.numpy.cluster.from_matrix>`
# routine...

from pykeops.numpy.cluster import from_matrix

ranges_ij = from_matrix(x_ranges, y_ranges, keep)

end = time.time()
print("Process the ranges       : {:.4f}s".format(end - start))

End = time.time()
t_cluster = End - Start
print("Total time (synchronized): {:.4f}s".format(End - Start))
print("")

###############################################################################
# And we're done: here is the **ranges** argument that can
# be fed to the KeOps reduction routines!
# For large point clouds, we can expect a speed-up that is directly
# proportional to the ratio of mass between our **fine binary mask**
# (encoded in **ranges_ij**) and the full, N-by-M kernel matrix: