コード例 #1
0
ファイル: sinkhorn_samples.py プロジェクト: jtpils/geomloss
def clusterize(α, x, scale=None, labels=None):
    """
    Performs a simple 'voxelgrid' clustering on the input measure,
    putting points into cubic bins of size 'scale' = σ_c.
    The weights are summed, and the centroid position is that of the bin's center of mass.
    Most importantly, the "fine" lists of weights and points are *sorted*
    so that clusters are *contiguous in memory*: this allows us to perform
    kernel truncation efficiently on the GPU.

    If 
        [α_c, α], [x_c, x], [x_ranges] = clusterize(α, x, σ_c),
    then
        α_c[k], x_c[k] correspond to
        α[x_ranges[k,0]:x_ranges[k,1]], x[x_ranges[k,0]:x_ranges[k,1],:]
    """
    if labels is None and scale is None:  # No clustering, single-scale Sinkhorn on the way...
        return [α], [x], []

    else:  # As of today, only two-scale Sinkhorn is implemented:
        # Compute simple (voxel-like) class labels:
        x_lab = grid_cluster(x, scale) if labels is None else labels
        # Compute centroids and weights:
        ranges_x, x_c, α_c = cluster_ranges_centroids(x, x_lab, weights=α)
        # Make clusters contiguous in memory:
        (α, x), x_labels = sort_clusters((α, x), x_lab)

        return [α_c, α], [x_c, x], [ranges_x]
コード例 #2
0
    def _Gauss_block_sparse_pre(self, x: torch.tensor, y: torch.tensor, K_ij: LazyTensor):
        '''
        Helper function to preprocess data for block-sparse reduction
        of the Gaussian kernel

        Args:
            x[np.array], y[np.array] = arrays giving rise to Gaussian kernel K(x,y)
            K_ij[LazyTensor_n] = symbolic representation of K(x,y)
            eps[float] = size for square bins
        Returns:
            K_ij[LazyTensor_n] = symbolic representation of K(x,y) with
                                set sparse ranges
        '''
        # labels for low dimensions

        if x.shape[1] < 4 or y.shape[1] < 4:

            x_labels = grid_cluster(x, self.eps)
            y_labels = grid_cluster(y, self.eps)
            # range and centroid per class
            x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
            y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
        else:
            # labels for higher dimensions

            x_labels, x_centroids = self._KMeans(x)
            y_labels, y_centroids = self._KMeans(y)
            # compute ranges
            x_ranges = cluster_ranges(x_labels)
            y_ranges = cluster_ranges(y_labels)

        # sort points
        x, x_labels = sort_clusters(x, x_labels)
        y, y_labels = sort_clusters(y, y_labels)
        # Compute a coarse Boolean mask:
        D = torch.sum((x_centroids[:, None, :] - y_centroids[None, :, :]) ** 2, 2)
        keep = D < (self.mask_radius) ** 2
        # mask -> set of integer tensors
        ranges_ij = from_matrix(x_ranges, y_ranges, keep)
        K_ij.ranges = ranges_ij  # block-sparsity pattern

        return K_ij
コード例 #3
0
    def kneighbors(self, y):
        if use_cuda:
            torch.cuda.synchronize()
        d = ((y.unsqueeze(1) - self.c.unsqueeze(0))**2).sum(-1)
        y_labels = torch.argmin(d, dim=1)

        y_ranges, _, _ = cluster_ranges_centroids(y, self.cl)

        y, y_labels = sort_clusters(y, y_labels)

        ranges_ij = from_matrix(self.x_ranges, y_ranges, self.keep)

        y_LT = LazyTensor(y.unsqueeze(0))
        D_ij = ((y_LT - self.x)**2).sum(-1)
        D_ij.ranges = ranges_ij
        return D_ij.argKmin(K=self.k, axis=1)
コード例 #4
0
    def fit(self, x, use_torch=True, clusters=50, a=5):

        cl, c = KMeans(x, clusters)

        self.c = c
        #update cluster assignment
        if use_torch:
            d = ((x.unsqueeze(1) - c.unsqueeze(0))**2).sum(-1)
            self.cl = torch.argmin(d, dim=1)
        else:
            self.cl = k_argmin(x, c)
        if use_cuda:
            torch.cuda.synchronize()

        #get KNN graph for the clusters
        if use_torch:

            self.ncl = k_argmin_torch(c, c, k=a)
        else:

            c1 = LazyTensor(c.unsqueeze(1))
            c2 = LazyTensor(c.unsqueeze(0))
            d = ((c1 - c2)**2).sum(-1)
            self.ncl = d.argKmin(K=a, dim=1)  #get a nearest clusters

        #get the ranges and centroids
        self.x_ranges, _, _ = cluster_ranges_centroids(x, self.cl)

        #

        x, x_labels = sort_clusters(x, self.cl)  #sort dataset to match ranges
        self.x = LazyTensor(x.unsqueeze(1))  #store dataset

        r = torch.arange(clusters).repeat(a, 1).T.reshape(-1).long()
        self.keep = torch.zeros([clusters, clusters], dtype=torch.bool)

        self.keep[r, self.ncl.flatten()] = True

        return self
コード例 #5
0
 def sorted(self, x, labels=None):
     if labels is None:
         labels = self.cl
     x, _ = sort_clusters(x, labels)
     return x
コード例 #6
0
start = time.time()
x_ranges, x_centroids, _ = cluster_ranges_centroids(x, x_labels)
y_ranges, y_centroids, _ = cluster_ranges_centroids(y, y_labels)
if use_cuda:
    torch.cuda.synchronize()
end = time.time()
print("Compute ranges+centroids : {:.4f}s".format(end - start))

###############################################
# Finally, we can **sort** our points according to their
# labels, making sure that **all clusters are stored contiguously in memory**:

from pykeops.torch.cluster import sort_clusters

start = time.time()
x, x_labels = sort_clusters(x, x_labels)
y, y_labels = sort_clusters(y, y_labels)
if use_cuda:
    torch.cuda.synchronize()
end = time.time()
print("Sort the points          : {:.4f}s".format(end - start))

####################################################################
# Cluster-Cluster binary mask
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# The key idea behind KeOps's block-sparsity mode
# is that as soon as data points are sorted,
# **we can manage the reduction scheme through a small, coarse boolean mask**
# whose values encode whether or not we should perform computations
# at a finer scale.
コード例 #7
0
def kernel_multiscale(α,
                      x,
                      β,
                      y,
                      blur=.05,
                      kernel=None,
                      name=None,
                      truncate=5,
                      diameter=None,
                      cluster_scale=None,
                      verbose=False,
                      **kwargs):

    if truncate is None or name == "energy":
        return kernel_online(α,
                             x,
                             β,
                             y,
                             blur=blur,
                             kernel=kernel,
                             truncate=truncate,
                             name=name,
                             **kwargs)

    # Renormalize our point cloud so that blur = 1:
    kernel, x, y = kernel_preprocess(kernel, name, x, y, blur)

    # Don't forget to normalize the diameter too!
    if cluster_scale is None:
        D = x.shape[-1]
        if diameter is None:
            diameter = max_diameter(x.view(-1, D), y.view(-1, D))
        else:
            diameter = diameter / blur
        cluster_scale = diameter / (np.sqrt(D) * 2000**(1 / D))

    # Put our points in cubic clusters:
    cell_diameter = cluster_scale * np.sqrt(x.shape[1])
    x_lab = grid_cluster(x, cluster_scale)
    y_lab = grid_cluster(y, cluster_scale)

    # Compute the ranges and centroids of each cluster:
    ranges_x, x_c, α_c = cluster_ranges_centroids(x, x_lab, weights=α)
    ranges_y, y_c, β_c = cluster_ranges_centroids(y, y_lab, weights=β)

    if verbose:
        print("{}x{} clusters, computed at scale = {:2.3f}".format(
            len(x_c), len(y_c), cluster_scale))

    # Sort the clusters, making them contiguous in memory:
    (α, x), x_lab = sort_clusters((α, x), x_lab)
    (β, y), y_lab = sort_clusters((β, y), y_lab)

    with torch.no_grad():  # Compute our block-sparse reduction ranges:
        # Compute pairwise distances between clusters:
        C_xx = squared_distances(x_c, x_c)
        C_yy = squared_distances(y_c, y_c)
        C_xy = squared_distances(x_c, y_c)

        # Compute the boolean masks:
        keep_xx = (C_xx <= (truncate + cell_diameter)**2)
        keep_yy = (C_yy <= (truncate + cell_diameter)**2)
        keep_xy = (C_xy <= (truncate + cell_diameter)**2)

        # Compute the KeOps reduction ranges:
        ranges_xx = from_matrix(ranges_x, ranges_x, keep_xx)
        ranges_yy = from_matrix(ranges_y, ranges_y, keep_yy)
        ranges_xy = from_matrix(ranges_x, ranges_y, keep_xy)

    return kernel_keops(kernel,
                        α,
                        x,
                        β,
                        y,
                        ranges_xx=ranges_xx,
                        ranges_yy=ranges_yy,
                        ranges_xy=ranges_xy)
コード例 #8
0
ファイル: transfer_labels.py プロジェクト: wzm2256/geomloss
X_i[:, :, 0, :] *= gamma
X_i[:, :, -1, :] *= gamma
Y_j[:, :, 0, :] *= gamma
Y_j[:, :, -1, :] *= gamma

###############################################################################
# Optimizing performances
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Contiguous memory accesses are critical for performances on the GPU.
#

from pykeops.torch.cluster import sort_clusters, cluster_ranges

ranges_j = cluster_ranges(labels_j)  # Ranges for all clusters
Y_j, labels_j = sort_clusters(
    Y_j, labels_j)  # Make sure that all clusters are contiguous in memory

C = len(ranges_j)  # Number of classes

if C != labels_j.max() + 1:
    raise ValueError("???")

for j, (start_j, end_j) in enumerate(ranges_j):
    if start_j >= end_j:
        raise ValueError(f"The {j}-th cluster of the atlas seems to be empty.")

###############################################################################
# Each fiber is sampled with 20 points in R^3.
# Thus, one tractogram is a matrix of size n x 60 where n is the number of fibers
# The atlas is labelled, wich means that each fiber belong to a cluster.
# This is summarized by the vector labels_j of size n x 1. labels_j[i] is the label of the fiber i.
コード例 #9
0
def kernel_multiscale(α,
                      x,
                      β,
                      y,
                      blur=0.05,
                      kernel=None,
                      name=None,
                      truncate=5,
                      diameter=None,
                      cluster_scale=None,
                      potentials=False,
                      verbose=False,
                      **kwargs):

    if truncate is None or name == "energy":
        return kernel_online(α.unsqueeze(0),
                             x.unsqueeze(0),
                             β.unsqueeze(0),
                             y.unsqueeze(0),
                             blur=blur,
                             kernel=kernel,
                             truncate=truncate,
                             name=name,
                             potentials=potentials,
                             **kwargs)

    # Renormalize our point cloud so that blur = 1:
    # Center the point clouds just in case, to prevent numeric overflows:
    center = (x.mean(-2, keepdim=True) + y.mean(-2, keepdim=True)) / 2
    x, y = x - center, y - center
    x_ = x / blur
    y_ = y / blur

    # Don't forget to normalize the diameter too!
    if cluster_scale is None:
        D = x.shape[-1]
        if diameter is None:
            diameter = max_diameter(x_.view(-1, D), y_.view(-1, D))
        else:
            diameter = diameter / blur
        cluster_scale = diameter / (np.sqrt(D) * 2000**(1 / D))

    # Put our points in cubic clusters:
    cell_diameter = cluster_scale * np.sqrt(x_.shape[-1])
    x_lab = grid_cluster(x_, cluster_scale)
    y_lab = grid_cluster(y_, cluster_scale)

    # Compute the ranges and centroids of each cluster:
    ranges_x, x_c, α_c = cluster_ranges_centroids(x_, x_lab, weights=α)
    ranges_y, y_c, β_c = cluster_ranges_centroids(y_, y_lab, weights=β)

    if verbose:
        print("{}x{} clusters, computed at scale = {:2.3f}".format(
            len(x_c), len(y_c), cluster_scale))

    # Sort the clusters, making them contiguous in memory:
    (α, x), x_lab = sort_clusters((α, x), x_lab)
    (β, y), y_lab = sort_clusters((β, y), y_lab)

    with torch.no_grad():  # Compute our block-sparse reduction ranges:
        # Compute pairwise distances between clusters:
        C_xx = squared_distances(x_c, x_c)
        C_yy = squared_distances(y_c, y_c)
        C_xy = squared_distances(x_c, y_c)

        # Compute the boolean masks:
        keep_xx = C_xx <= (truncate + cell_diameter)**2
        keep_yy = C_yy <= (truncate + cell_diameter)**2
        keep_xy = C_xy <= (truncate + cell_diameter)**2

        # Compute the KeOps reduction ranges:
        ranges_xx = from_matrix(ranges_x, ranges_x, keep_xx)
        ranges_yy = from_matrix(ranges_y, ranges_y, keep_yy)
        ranges_xy = from_matrix(ranges_x, ranges_y, keep_xy)

    return kernel_loss(
        α,
        x,
        β,
        y,
        blur=blur,
        kernel=kernel,
        name=name,
        potentials=potentials,
        use_keops=True,
        ranges_xx=ranges_xx,
        ranges_yy=ranges_yy,
        ranges_xy=ranges_xy,
    )