Example #1
0
    def __init__(self, kernel_width=None):
        self.kernel_type = 'keops'
        super().__init__(kernel_width)

        self.gaussian_convolve = generic_sum(
            "Exp(-G*SqDist(X,Y)) * P",
            "O = Vx(" + str(Settings().dimension) + ")", "G = Pm(1)",
            "X = Vx(" + str(Settings().dimension) + ")",
            "Y = Vy(" + str(Settings().dimension) + ")",
            "P = Vy(" + str(Settings().dimension) + ")")

        self.varifold_convolve = generic_sum(
            "Exp(-(WeightedSqDist(G, X, Y))) * Pow((Nx, Ny), 2) * P",
            "O = Vx(1)", "G = Pm(1)",
            "X = Vx(" + str(Settings().dimension) + ")",
            "Y = Vy(" + str(Settings().dimension) + ")",
            "Nx = Vx(" + str(Settings().dimension) + ")",
            "Ny = Vy(" + str(Settings().dimension) + ")", "P = Vy(1)")

        self.gaussian_convolve_gradient_x = generic_sum(
            "(Px, Py) * Exp(-G*SqDist(X,Y)) * (X-Y) * ",
            "O = Vx(" + str(Settings().dimension) + ")", "G = Pm(1)",
            "X = Vx(" + str(Settings().dimension) + ")",
            "Y = Vy(" + str(Settings().dimension) + ")",
            "Px = Vx(" + str(Settings().dimension) + ")",
            "Py = Vy(" + str(Settings().dimension) + ")")
Example #2
0
def kernel_keops(kernel,
                 α,
                 x,
                 β,
                 y,
                 potentials=False,
                 ranges_xx=None,
                 ranges_yy=None,
                 ranges_xy=None):

    D = x.shape[1]
    kernel_conv = generic_sum(
        "(" + kernel + " * B)",  # Formula
        "A = Vi(1)",  # Output:    a_i
        "X = Vi({})".format(D),  # 1st input: x_i
        "Y = Vj({})".format(D),  # 2nd input: y_j
        "B = Vj(1)")  # 3rd input: b_j

    a_x = kernel_conv(double_grad(x),
                      x.detach(),
                      α.detach().view(-1, 1),
                      ranges=ranges_xx)
    b_y = kernel_conv(double_grad(y),
                      y.detach(),
                      β.detach().view(-1, 1),
                      ranges=ranges_yy)
    b_x = kernel_conv(x, y, β.view(-1, 1), ranges=ranges_xy)

    if potentials:
        a_y = kernel_conv(y, x, α.view(-1, 1), ranges=swap_axes(ranges_xy))
        return a_x - b_x, b_y - a_y

    else:  # Return the Kernel norm. N.B.: we assume that 'kernel' is symmetric:
        return .5 * scal( double_grad(α), a_x ) \
             + .5 * scal( double_grad(β), b_y )  -  scal( α, b_x )
Example #3
0
def kernel_keops(kernel,
                 α,
                 x,
                 β,
                 y,
                 ranges_xx=None,
                 ranges_yy=None,
                 ranges_xy=None):

    D = x.shape[1]
    kernel_conv = generic_sum(
        "(" + kernel + " * B)",  # Formula
        "A = Vx(1)",  # Output:    a_i
        "X = Vx({})".format(D),  # 1st input: x_i
        "Y = Vy({})".format(D),  # 2nd input: y_j
        "B = Vy(1)")  # 3rd input: b_j

    a_i = kernel_conv(double_grad(x),
                      x.detach(),
                      α.detach().view(-1, 1),
                      ranges=ranges_xx)
    b_j = kernel_conv(double_grad(y),
                      y.detach(),
                      β.detach().view(-1, 1),
                      ranges=ranges_yy)
    b_i = kernel_conv(x, y, β.view(-1, 1), ranges=ranges_xy)

    # N.B.: we assume that 'kernel' is symmetric:
    return .5 * scal( double_grad(α), a_i ) \
         + .5 * scal( double_grad(β), b_j )  -  scal( α, b_i )
Example #4
0
def Projection_ops(p, ε, x_i, y_j):
    "Normalization weights for the Barycenter ops."
    if backend == "keops":  # Memory-efficient GPU implementation
        # We create a KeOps GPU routine...
        if p == 1: formula = "Exp(Fj + Gi - (Sqrt(SqDist(Xi,Yj))/ E))"
        elif p == 2: formula = "Exp(Fj + Gi -      (SqDist(Xi,Yj) / E))"
        else:
            formula = "Exp( Fj + Gi - (Powf(SqDist(Xi,Yj),R)/ E) )"
            raise (NotImplementedError(
                "I should fix the derivative at 0 of Powf, in KeOps's core."))
        D = x_i.shape[1]  # Dimension of the ambient space (typically 2 or 3)
        routine = generic_sum(
            formula,
            "outi = Vx(1)",  # Formula, output...
            # and input variables : ε, x_i, y_j, f_j, p/2 given with their respective dimensions
            "E = Pm(1)",
            "Xi = Vx({})".format(D),
            "Yj = Vy({})".format(D),
            "Fj = Vy(1)",
            "Gi = Vx(1)",
            "R=Pm(1)")

        # Before wrapping it up in a simple pair of operators - don't forget the minus!
        ε, r = torch.Tensor([ε]).type_as(x_i), torch.Tensor([p / 2
                                                             ]).type_as(x_i)
        P_x = lambda f_i, g_j: routine(ε, y_j, x_i, f_i, g_j, r)
        P_y = lambda f_j, g_i: routine(ε, x_i, y_j, f_j, g_i, r)
        return P_x, P_y

    elif backend == "pytorch":
        raise NotImplementedError()
Example #5
0
def conv(k, x_i, y_j, β_j) :
    k_name, s = k
    if   backend == "keops" : # Memory-efficient GPU implementation : ONline map-reduce
        # We create a KeOps GPU routine...
        s2v = lambda g : torch.Tensor([g]).type_as(x_i)
        if   k_name == "energy"   : formula = " - Sqrt(SqDist(Xi,Yj))  * Bj" ; g = s2v(1.)
        elif k_name == "gaussian" : formula = "Exp( -G*SqDist(Xi,Yj) ) * Bj" ; g = s2v(1/s**2)
        elif k_name == "laplacian": formula = "Exp( -G*Sqrt(SqDist(Xi,Yj)) ) * Bj" ; g = s2v(1/s)
        else :                      raise NotImplementedError()

        D = x_i.shape[1] # Dimension of the ambient space (typically 2 or 3)
        routine = generic_sum( formula, "out_i = Vx(1)", # Formula, output...
            # and input variables : g, x_i, y_j, β_j, given with their respective dimensions
            "G = Pm(1)", "Xi = Vx({})".format(D), "Yj = Vy({})".format(D), "Bj = Vy(1)")

        # ...Before applying it to our data:
        return routine(g, x_i, y_j, β_j)

    elif backend == "pytorch" : # Naive matrix-vector implementation : OFFline map-reduce
        XmY2 = ( (x_i.unsqueeze(1) - y_j.unsqueeze(0)) ** 2).sum(2)
        if   k_name == "energy"   : K =  -XmY2.sqrt()
        elif k_name == "gaussian" : K = (-XmY2 / s**2).exp()
        elif k_name == "laplacian": K = (-XmY2.sqrt() / s).exp()
        else :                      raise NotImplementedError()
        return K @ β_j
def gaussianconv_keops(x, y, b):
    fun = generic_sum("Exp(-SqDist(X,Y)) * B",  # Formula
                                 "A = Vi(1)",              # Output
                                 "X = Vi({})".format(D),   # 1st argument
                                 "Y = Vj({})".format(D),   # 2nd argument
                                 "B = Vj(1)" )             # 3rd argument
    backend = 'GPU' if use_cuda else 'CPU'
    return fun(x, y, b, backend=backend)
Example #7
0
    def __init__(self,
                 kernel_width=None,
                 device=default.deformation_kernel_device,
                 **kwargs):

        if device.lower() == 'cuda':
            device = 'GPU'

        super().__init__('keops', kernel_width, device)

        self.gamma = 1. / default.tensor_scalar_type([self.kernel_width**2])

        self.gaussian_convolve = []
        self.point_cloud_convolve = []
        self.varifold_convolve = []
        self.gaussian_convolve_gradient_x = []

        for dimension in [2, 3]:
            self.gaussian_convolve.append(
                generic_sum("Exp(-G*SqDist(X,Y)) * P",
                            "O = Vx(" + str(dimension) + ")", "G = Pm(1)",
                            "X = Vx(" + str(dimension) + ")",
                            "Y = Vy(" + str(dimension) + ")",
                            "P = Vy(" + str(dimension) + ")"))

            self.point_cloud_convolve.append(
                generic_sum("Exp(-G*SqDist(X,Y)) * P", "O = Vx(1)",
                            "G = Pm(1)", "X = Vx(" + str(dimension) + ")",
                            "Y = Vy(" + str(dimension) + ")", "P = Vy(1)"))

            self.varifold_convolve.append(
                generic_sum(
                    "Exp(-(WeightedSqDist(G, X, Y))) * Square((Nx|Ny)) * P",
                    "O = Vx(1)", "G = Pm(1)", "X = Vx(" + str(dimension) + ")",
                    "Y = Vy(" + str(dimension) + ")",
                    "Nx = Vx(" + str(dimension) + ")",
                    "Ny = Vy(" + str(dimension) + ")", "P = Vy(1)"))

            self.gaussian_convolve_gradient_x.append(
                generic_sum("(Px|Py) * Exp(-G*SqDist(X,Y)) * (X-Y)",
                            "O = Vx(" + str(dimension) + ")", "G = Pm(1)",
                            "X = Vx(" + str(dimension) + ")",
                            "Y = Vy(" + str(dimension) + ")",
                            "Px = Vx(" + str(dimension) + ")",
                            "Py = Vy(" + str(dimension) + ")"))
def gaussianconv_keops(x, y, b, backend="GPU", **kwargs):
    """(B,N,D), (B,N,D), (B,N,1) -> (B,N,1)"""
    fun = generic_sum(
        "Exp(-SqDist(X,Y)) * B",  # Formula
        "A = Vi(1)",  # Output
        "X = Vi({})".format(D),  # 1st argument
        "Y = Vj({})".format(D),  # 2nd argument
        "B = Vj(1)",  # 3rd argument
    )
    return fun(x, y, b, backend=backend)
Example #9
0
def expscalprod_keops_nochunks(x, y, b):
    D = x.shape[1]
    fun = generic_sum(
        "Exp(X|Y) * B",  # Formula
        "A = Vi(1)",  # Output
        "X = Vi({})".format(D),  # 1st argument
        "Y = Vj({})".format(D),  # 2nd argument
        "B = Vj(1)",  # 3rd argument
        enable_chunks=False)
    backend = 'GPU' if use_cuda else 'CPU'
    return fun(x, y, b, backend=backend)
Example #10
0
def gaussianconv_keops(x, y, b):
    D = x.shape[1]
    fun = generic_sum(
        "Exp(X|Y) * B",  # Formula
        "A = Vi(1)",  # Output
        "X = Vi({})".format(D),  # 1st argument
        "Y = Vj({})".format(D),  # 2nd argument
        "B = Vj(1)")  # 3rd argument
    backend = 'GPU' if use_cuda else 'CPU'
    ex = (-(x * x).sum(-1)).exp()[:, None]
    ey = (-(y * y).sum(-1)).exp()[:, None]
    return ex * fun(2 * x, y, b * ey, backend=backend)
def gaussianconv_keops(x, y, b, backend="GPU", **kwargs):
    D = x.shape[-1]
    fun = generic_sum(
        "Exp(X|Y) * B",  # Formula
        "A = Vi(1)",  # Output
        "X = Vi({})".format(D),  # 1st argument
        "Y = Vj({})".format(D),  # 2nd argument
        "B = Vj(1)",  # 3rd argument
    )
    ex = (-(x * x).sum(-1)).exp()[:, :, None]
    ey = (-(y * y).sum(-1)).exp()[:, :, None]
    return ex * fun(2 * x, y, b * ey, backend=backend)
def gaussianconv_keops_nochunks(x, y, b):
    D = x.shape[1]
    fun = generic_sum(
        "Exp(X|Y) * B",  # Formula
        "A = Vi(1)",  # Output
        "X = Vi({})".format(D),  # 1st argument
        "Y = Vj({})".format(D),  # 2nd argument
        "B = Vj(1)",  # 3rd argument
        enable_chunks=False,
    )
    backend = "GPU" if use_cuda else "CPU"
    ex = (-(x * x).sum(-1)).exp()[:, None]
    ey = (-(y * y).sum(-1)).exp()[:, None]
    return ex * fun(2 * x, y, b * ey, backend=backend)
Example #13
0
def Barycenters_ops(p, ε, x_i, y_j):
    """
    Given:
    - an exponent p = 1 or 2
    - a regularization strength ε > 0
    - point clouds x_i and y_j, encoded as N-by-D and M-by-D torch arrays,

    Returns a pair of routines R_x, R_y such that
      [R_x(f_i, g_j)]_j = sum_i exp( f_i + g_j - |x_i-y_j|^p / ε ) * (x_i-y_j)
      [R_y(f_j, g_i)]_i = sum_j exp( f_j + g_i - |x_i-y_j|^p / ε ) * (y_j-x_i)

    This may look like a strange level of abstraction, but it is the most convenient way of
    working with KeOps and Vanilla pytorch (with a pre-computed cost matrix) at the same time.
    """
    if backend == "keops":  # Memory-efficient GPU implementation
        # We create a KeOps GPU routine...
        if p == 1:
            formula = "Exp(Fj + Gi - (Sqrt(SqDist(Xi,Yj))/ E)) * (Yj-Xi)"
        elif p == 2:
            formula = "Exp(Fj + Gi -      (SqDist(Xi,Yj) / E)) * (Yj-Xi)"
        else:
            formula = "Exp( Fj + Gi - (Powf(SqDist(Xi,Yj),R)/ E) ) * (Yj-Xi)"
            raise (NotImplementedError(
                "I should fix the derivative at 0 of Powf, in KeOps's core."))
        D = x_i.shape[1]  # Dimension of the ambient space (typically 2 or 3)
        routine = generic_sum(
            formula,
            "outi = Vx({})".format(D),  # Formula, output...
            # and input variables : ε, x_i, y_j, f_j, p/2 given with their respective dimensions
            "E = Pm(1)",
            "Xi = Vx({})".format(D),
            "Yj = Vy({})".format(D),
            "Fj = Vy(1)",
            "Gi = Vx(1)",
            "R=Pm(1)")

        # Before wrapping it up in a simple pair of operators - don't forget the minus!
        ε, r = torch.Tensor([ε]).type_as(x_i), torch.Tensor([p / 2
                                                             ]).type_as(x_i)
        R_x = lambda f_i, g_j: routine(ε, y_j, x_i, f_i, g_j, r)
        R_y = lambda f_j, g_i: routine(ε, x_i, y_j, f_j, g_i, r)
        return R_x, R_y

    elif backend == "pytorch":
        raise NotImplementedError()
Example #14
0
def my_formula(p, x, y, backend="auto"):
    """
    Applies a custom formula on the torch variables P, X and Y.
    Two backends are provided, so that we can check the correctness
    of both implementations.
    """
    if backend == "pytorch":  # Vanilla PyTorch implementation ===================
        scals = (x @ y.t())**2  # Memory-intensive computation!
        a = p[0] * scals.sum(1).view(-1,1) * x \
          + p[1] * (scals @ y)
        return a

    else:  # KeOps implementation ================================================
        # We now expose the low-level syntax of KeOps.
        # The library relies on vector "Variables" which can be either:
        # - indexed by "i" ("x" variables, category 0)
        # - indexed by "j" ("y" variables, category 1)
        # - constant across the reduction ("parameters", category 2)
        #
        # First of all, we must define a "who's who" list of the variables used,
        # by specifying their categories, index in the arguments' list, and dimensions:
        types = [
            "A = Vx(" + str(x.shape[1]) +
            ") ",  # output,       indexed by i, dim D.
            "P = Pm(2)",  # 1st argument,  a parameter, dim 2. 
            "X = Vx(" + str(x.shape[1]) +
            ") ",  # 2nd argument, indexed by i, dim D.
            "Y = Vy(" + str(y.shape[1]) + ") "
        ]  # 3rd argument, indexed by j, dim D.

        # The actual formula:
        # a_i   =   (<x_i,y_j>**2) * (       p[0]*x_i  +       p[1]*y_j )
        formula = "Pow( (X|Y) , 2) * ( (Elem(P,0) * X) + (Elem(P,1) * Y) )"

        my_routine = generic_sum(formula, *types)
        a = my_routine(p, x, y, backend=backend)
        return a
# We should simply come back to the expression of :math:`\pi_{i,j}`
# and write:
#
# .. math::
#   \text{Lab}_i ~&=~ \sum_{j=1}^M \exp \tfrac{1}{\varepsilon}[f_i+ g_j - \text{C}(x_i,y_j)] \cdot \beta_j \ell_j \\
#   &=~ \frac{1}{M} \sum_{j=1}^M  \exp \tfrac{1}{\varepsilon}[f_i+ g_j - \tfrac{1}{2}\|x_i-y_j\|^2] \cdot \ell_j.
#

from pykeops.torch import generic_sum

# Define our KeOps CUDA kernel:
transfer = generic_sum(
    "Exp( (F_i + G_j - IntInv(2)*SqDist(X_i,Y_j)) / E ) * L_j",  # See the formula above
    "Lab = Vi(3)",  # Output:  one vector of size 3 per line
    "E   = Pm(1)",  # 1st arg: a scalar parameter, the temperature
    "X_i = Vi(2)",  # 2nd arg: one 2d-point per line
    "Y_j = Vj(2)",  # 3rd arg: one 2d-point per column
    "F_i = Vi(1)",  # 4th arg: one scalar value per line
    "G_j = Vj(1)",  # 5th arg: one scalar value per column
    "L_j = Vj(3)")  # 6th arg: one vector of size 3 per column

# And apply it on the data (KeOps is pretty picky on the input shapes...):
labels_i = transfer(
    torch.Tensor([blur**2]).type(dtype), X_i, Y_j, F_i.view(-1, 1),
    G_j.view(-1, 1), l_j) / M

###############################################
# That's it! We may now display our target point cloud :math:`(x_i)`
# with its new set of labels:

# sphinx_gallery_thumbnail_number = 2
Example #16
0
def benchmark(bench_name,
              N,
              dev,
              backend,
              loops=10,
              enable_GC=True,
              fidelity=None):

    importlib.reload(torch)

    device = torch.device(dev)
    x_i = torch.randn(N,
                      D,
                      dtype=torch.float32,
                      device=device,
                      requires_grad=True)
    y_j = torch.randn(N, D, dtype=torch.float32, device=device)
    α_i = torch.randn(N, 1, dtype=torch.float32, device=device)
    β_j = torch.randn(N, 1, dtype=torch.float32, device=device)

    α_i = α_i.abs()
    β_j = β_j.abs()
    α_i = α_i / α_i.sum()
    β_j = β_j / β_j.sum()

    s2v = lambda x: torch.tensor([x], dtype=torch.float32, device=device)

    def scal(α, f):
        return torch.dot(α.view(-1), f.view(-1))

    if bench_name == "energy_distance":
        keops_conv = generic_sum(
            "Sqrt(SqDist(Xi,Yj))* Bj",
            "out_i = Vx(1)",  # Formula, output...
            # and input variables : x_i, y_j, β_j, given with their respective dimensions
            "Xi = Vx({})".format(D),
            "Yj = Vy({})".format(D),
            "Bj = Vy(1)")

        def vanilla_conv(x, y, β):
            XmY2 = ((x.unsqueeze(1) - y.unsqueeze(0))**2).sum(2)
            K = XmY2.sqrt()
            return K @ β

        def bench(α, x, β, y):
            if backend == "GPU_1D":
                conv = keops_conv
            elif backend == "pytorch":
                conv = vanilla_conv
            cost = scal(α,
                        conv(x, y, β) -
                        .5 * conv(x, x, α)) - .5 * scal(β, conv(y, y, β))
            cost.backward()
            return cost

        code = '_ = bench(α_i,x_i,β_j,y_j)'
        task = "Energy Distances"

    if bench_name == "LogSumExp":
        keops_lse = generic_logsumexp(
            "Sqrt(SqDist(Xi,Yj))",
            "out_i = Vx(1)",  # Formula, output...
            # and input variables : x_i, y_j, β_j, given with their respective dimensions
            "Xi = Vx({})".format(D),
            "Yj = Vy({})".format(D))

        def lse(v_ij):
            """[lse(v_ij)]_i = log sum_j exp(v_ij), with numerical accuracy."""
            V_i = torch.max(v_ij, 1)[0].view(-1, 1)
            return V_i + (v_ij - V_i).exp().sum(1).log().view(-1, 1)

        def vanilla_lse(x, y):
            XmY2 = ((x.unsqueeze(1) - y.unsqueeze(0))**2).sum(2)
            K = XmY2.sqrt()
            return lse(K)

        def bench(x, y):
            if backend == "GPU_1D":
                return keops_lse(x, y)
            elif backend == "pytorch":
                return vanilla_lse(x, y)
            else:
                raise NotImplementedError()

        code = '_ = bench(x_i,y_j)'
        task = "LSEs"

    elif bench_name == "fidelities":

        from divergences import kernel_divergence, regularized_ot, hausdorff_divergence, sinkhorn_divergence

        if fidelity == "energy_distance":
            params = ("energy", None)
            code = "c = kernel_divergence(α_i,x_i, β_j,y_j, k=params ) ; c.backward()"

        elif fidelity == "hausdorff":
            params = {
                "p": 1,
                "eps": .1,
                "nits": 3,
                "tol": 0.,
            }
            code = "c = hausdorff_divergence(α_i,x_i, β_j,y_j, **params ) ; c.backward()"

        elif fidelity == "sinkhorn":
            params = {
                "p": 1,
                "eps": .1,
                "nits": (20, 3),
                "assume_convergence":
                True,  # This is true in practice, and lets us win a x2 factor
                "tol": 0.,
            }
            code = "c = sinkhorn_divergence(α_i,x_i, β_j,y_j, **params ) ; c.backward()"

        elif fidelity == "sinkhorn_nocv":
            params = {
                "p": 1,
                "eps": .1,
                "nits": (20, 3),
                "assume_convergence": False,
                "tol": 0.,
            }
            code = "c = sinkhorn_divergence(α_i,x_i, β_j,y_j, **params ) ; c.backward()"

        task = "fidelities"

    exec(code, locals())
    import gc
    GC = 'gc.enable();' if enable_GC else 'pass;'
    print("{:3} NxN {}, with N ={:7}: {:3}x".format(loops, task, N, loops),
          end="")

    exec(code, locals())  # Warmup run

    elapsed = timeit.Timer(code, GC, globals=locals(),
                           timer=time.time).timeit(loops)

    print("{:3.6f}s".format(elapsed / loops))
    return elapsed / loops
#--------------------------------------------------------------#
#                        Kernel                                #
#--------------------------------------------------------------#

formula = "Square(p-a)*Exp(x+y)"
types = [
    "output = Vx(3)",  # The result is indexed by "i", of size 3.
    "p = Pm(1)",  # First  arg : Parameter,  of size 1 (scalar)
    "a = Vy(1)",  # Second arg : j-variable, of size 1 (scalar)
    "x = Vx(3)",  # Third  arg : i-variable, of size 3
    "y = Vy(3)"
]  # Fourth arg : j-variable, of size 3

start = time.time()

my_routine = generic_sum(formula, *types)
c = my_routine(p, a, x, y, backend="CPU")

# N.B.: If CUDA is available + backend="auto" (or not specified) + the arrays are large enough,
#       KeOps will load the data on the GPU + compute + unload the result back to the CPU,
#       as it is assumed to be more efficient.
#       By specifying backend="CPU", we make sure that the result is computed
#       using a simple C++ for loop.

print("Time to compute the convolution operation on the cpu : ",
      round(time.time() - start, 2), "s")

#--------------------------------------------------------------#
#                        Gradient                              #
#--------------------------------------------------------------#
Example #18
0
KeOps
=====
"""

import torch
import numpy as np
from time import time

nits = 10
Ns, D = [10000, 100000, 1000000], 3

from pykeops.torch import generic_sum

KP = generic_sum(
    "Exp(-SqDist(X,Y)) * B",  # Formula
    "A = Vi(1)",  # Output
    "X = Vi({})".format(D),  # 1st argument
    "Y = Vj({})".format(D),  # 2nd argument
    "B = Vj(1)")  # 3rd argument

for N in Ns:

    # Generate the data
    x = torch.randn(N, D).cuda()
    y = torch.randn(N, D).cuda()
    p = torch.randn(N, 1).cuda()

    # First run just in case...
    p = KP(x, y, p)

    # Timings for KeOps
    start = time()