Beispiel #1
0
    def _Gauss_block_sparse_pre(self, x:np.array, y:np.array, K_ij:LazyTensor, 
                               sigma:float = 1., eps:float = 0.05):
        ''' 
        Helper function to preprocess data for block-sparse reduction
        of the Gaussian kernel
    
        Args: 
            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
            eps[float] = size for square bins

        Returns:
            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
                                set sparse ranges
        '''

        # class labels
        x_labels = grid_cluster(x, eps) 
        y_labels = grid_cluster(y, eps) 
        # compute one range and centroid per class
        x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
        y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
        # sort points
        x, x_labels = sort_clusters(x, x_labels)
        y, y_labels = sort_clusters(y, y_labels) 
        # Compute a coarse Boolean mask:
        D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
        keep = D < (4 * sigma) ** 2  # self.sigma 
        # mask -> set of integer tensors
        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
        K_ij.ranges = ranges_ij  # block-sparsity pattern

        return K_ij
Beispiel #2
0
    def _Gauss_block_sparse_pre(self, x: np.array, y: np.array,
                                K_ij: LazyTensor):
        ''' 
        Helper function to preprocess data for block-sparse reduction
        of the Gaussian kernel
    
        Args: 
            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
            eps[float] = size for square bins
        Returns:
            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with 
                                set sparse ranges
        '''
        # labels for low dimensions
        if x.shape[1] < 4 or y.shape[1] < 4:
            x_labels = grid_cluster(x, self.eps)
            y_labels = grid_cluster(y, self.eps)
            # range and centroid per class
            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
        else:
            # labels for higher dimensions
            x_labels, x_centroids = self._KMeans(x)
            y_labels, y_centroids = self._KMeans(y)
            # compute ranges
            x_ranges = cluster_ranges(x_labels)
            y_ranges = cluster_ranges(y_labels)

        # sort points
        x, x_labels = sort_clusters(x, x_labels)
        y, y_labels = sort_clusters(y, y_labels)
        # Compute a coarse Boolean mask:
        if self.kernel == 'rbf':
            D = np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2,
                       2)
        elif self.kernel == 'exp':
            D = np.sqrt(
                np.sum((x_centroids[:, None, :] - y_centroids[None, :, :])**2,
                       2))
        keep = D < (self.mask_radius)**2
        # mask -> set of integer tensors
        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
        K_ij.ranges = ranges_ij  # block-sparsity pattern

        return K_ij
# into groups which should neither be too **small** (performances on clusters
# with less than ~200 points each are suboptimal)
# nor too **many** (the :func:`from_matrix() <pykeops.numpy.cluster.from_matrix>`
# pre-processor can become a bottleneck when working with >2,000 clusters
# per point cloud).
#
# In this tutorial, we use the :func:`grid_cluster() <pykeops.numpy.cluster.grid_cluster>`
# routine which simply groups points into **cubic bins** of arbitrary size:

from pykeops.numpy.cluster import grid_cluster

eps = .05  # Size of our square bins

Start = time.time()
start = time.time()
x_labels = grid_cluster(x, eps)  # class labels
y_labels = grid_cluster(y, eps)  # class labels
end = time.time()
print("Perform clustering       : {:.4f}s".format(end - start))

##########################################################################
# Once (integer) cluster labels have been computed,
# we can compute the **centroids** and **memory footprint** of each class:

from pykeops.numpy.cluster import cluster_ranges_centroids

# Compute one range and centroid per class:
start = time.time()
x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
end = time.time()
Beispiel #4
0
# **skip computations** between pairs of points that are far away from each other.
#
# As explained in :doc:`the documentation <../../python/sparsity>`,
# fast GPU routines rely heavily on **memory contiguity**:
# before going any further, we must
# **sort our input dataset** to make sure that neighboring points are stored
# next to each other on the device memory. As detailed in the
# :doc:`KeOps+NumPy tutorial on block-sparse reductions <../../_auto_examples/numpy/plot_grid_cluster_numpy>`,
# a simple way of doing so is to write:

# Import the KeOps helper routines for block-sparse reductions:
from pykeops.numpy.cluster import grid_cluster, cluster_ranges_centroids, sort_clusters, from_matrix

# Put our points in cubic bins of size eps, as we compute a vector of class labels:
eps = .05
x_labels = grid_cluster(x, eps)
# Compute the memory footprint and centroid of each of those non-empty "cubic" clusters:
x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
# Sort our dataset according to the vector of labels:
x, x_labels = sort_clusters(x, x_labels)

#############################################################################
#
# .. note::
#   In higher-dimensional settings, the simplistic
#   :func:`grid_cluster <pykeops.numpy.cluster.grid_cluster>`
#   scheme could be replaced by a more versatile routine such as
#   our :doc:`KeOps+NumPy K-means implementation <../kmeans/plot_kmeans_numpy>`.
#
# Points are now roughly sorted
# according to their locations, with each cluster corresponding to