Python Genred Examples, pykeops.torch.Genred Python Examples

Example #1

0

Show file

    def test_generic_syntax_double(self):
        ############################################################
        from pykeops.torch import Genred

        aliases = ["p=Pm(1)", "a=Vj(1)", "x=Vi(3)", "y=Vj(3)"]
        formula = "Square(p-a)*Exp(x+y)"
        if pykeops.config.gpu_available:
            backend_to_test = ["auto", "GPU_1D", "GPU_2D", "GPU"]
        else:
            backend_to_test = ["auto"]

        for b in backend_to_test:
            with self.subTest(b=b):
                # Call cuda kernel
                gamma_keops = Genred(formula, aliases, axis=1,
                                     dtype="float64")(self.sigmacd,
                                                      self.gcd,
                                                      self.xcd,
                                                      self.ycd,
                                                      backend=b)
                # Numpy version
                gamma_py = np.sum(
                    (self.sigma - self.g)**2 * np.exp(
                        (self.y.T[:, :, np.newaxis] +
                         self.x.T[:, np.newaxis, :])),
                    axis=1,
                ).T
                # compare output
                self.assertTrue(
                    np.allclose(gamma_keops.cpu().data.numpy(),
                                gamma_py,
                                atol=1e-6))

Example #2

0

Show file

    def test_generic_syntax_float(self):
        ############################################################
        from pykeops.torch import Genred
        aliases = ['p=Pm(1)', 'a=Vj(1)', 'x=Vi(3)', 'y=Vj(3)']
        formula = 'Square(p-a)*Exp(x+y)'
        if pykeops.config.gpu_available:
            backend_to_test = ['auto', 'GPU_1D', 'GPU_2D', 'GPU']
        else:
            backend_to_test = ['auto']

        for b in backend_to_test:
            with self.subTest(b=b):
                # Call cuda kernel
                gamma_keops = Genred(formula, aliases, axis=1,
                                     dtype='float32')(self.sigmac,
                                                      self.gc,
                                                      self.xc,
                                                      self.yc,
                                                      backend=b)
                # Numpy version
                gamma_py = np.sum((self.sigma - self.g)**2 * np.exp(
                    (self.y.T[:, :, np.newaxis] + self.x.T[:, np.newaxis, :])),
                                  axis=1).T
                # compare output
                self.assertTrue(
                    np.allclose(gamma_keops.cpu().data.numpy(),
                                gamma_py,
                                atol=1e-6))

Example #3

0

Show file

    def __init__(self,
                 gpu_mode=default.gpu_mode,
                 kernel_width=None,
                 cuda_type=None,
                 **kwargs):
        super().__init__('keops', gpu_mode, kernel_width)

        if cuda_type is None:
            cuda_type = default.dtype

        self.cuda_type = cuda_type

        self.gamma = 1. / default.tensor_scalar_type([self.kernel_width**2])

        self.gaussian_convolve = []
        self.point_cloud_convolve = []
        self.varifold_convolve = []
        self.gaussian_convolve_gradient_x = []

        for dimension in [2, 3]:
            self.gaussian_convolve.append(
                Genred("Exp(-G*SqDist(X,Y)) * P", [
                    "G = Pm(1)", "X = Vi(" + str(dimension) + ")", "Y = Vj(" +
                    str(dimension) + ")", "P = Vj(" + str(dimension) + ")"
                ],
                       reduction_op='Sum',
                       axis=1,
                       cuda_type=cuda_type))

            self.point_cloud_convolve.append(
                Genred("Exp(-G*SqDist(X,Y)) * P", [
                    "G = Pm(1)", "X = Vi(" + str(dimension) + ")",
                    "Y = Vj(" + str(dimension) + ")", "P = Vj(1)"
                ],
                       reduction_op='Sum',
                       axis=1,
                       cuda_type=cuda_type))

            self.varifold_convolve.append(
                Genred("Exp(-(WeightedSqDist(G, X, Y))) * Square((Nx|Ny)) * P",
                       [
                           "G = Pm(1)", "X = Vi(" + str(dimension) + ")",
                           "Y = Vj(" + str(dimension) + ")",
                           "Nx = Vi(" + str(dimension) + ")",
                           "Ny = Vj(" + str(dimension) + ")", "P = Vj(1)"
                       ],
                       reduction_op='Sum',
                       axis=1,
                       cuda_type=cuda_type))

            self.gaussian_convolve_gradient_x.append(
                Genred("(Px|Py) * Exp(-G*SqDist(X,Y)) * (X-Y)", [
                    "G = Pm(1)", "X = Vi(" + str(dimension) + ")", "Y = Vj(" +
                    str(dimension) + ")", "Px = Vi(" + str(dimension) + ")",
                    "Py = Vj(" + str(dimension) + ")"
                ],
                       reduction_op='Sum',
                       axis=1,
                       cuda_type=cuda_type))

Example #4

0

Show file

File: StructuredField_pm.py Project: bgris/implicitmodules

    def _compute_reduction_keops(self, points, P, k):
        if k == 0:
            kernel_formula = "S*Exp(-S*SqNorm2(x - y)/IntCst(2))*(x - y)"
            formula = "TensorDot({kernel_formula}, p, Ind({dim}), Ind({dim}, {dim}), Ind(0), Ind(1))".format(
                kernel_formula=kernel_formula, dim=self.dim)
            alias = [
                "x=Vi(" + str(self.dim) + ")", "y=Vj(" + str(self.dim) + ")",
                "p=Vj(" + str(self.dim * self.dim) + ")", "S=Pm(1)"
            ]
            reduction = Genred(formula,
                               alias,
                               reduction_op='Sum',
                               axis=1,
                               dtype=str(points.dtype).split(".")[1])
            return reduction(points,
                             self.support,
                             P.reshape(-1, self.dim * self.dim),
                             self._keops_sigma,
                             backend=self._keops_backend).reshape(
                                 -1, self.dim)

        if k == 1:
            kernel_formula = "-S*Exp(-S*SqNorm2(x - y)/IntCst(2))*(S*TensorDot(x-y, x-y, Ind({dim}), Ind({dim}), Ind(), Ind())-eye)".format(
                dim=self.dim)
            formula = "TensorDot({kernel_formula}, p, Ind({dim}, {dim}), Ind({dim}, {dim}), Ind(1),Ind(1))".format(
                kernel_formula=kernel_formula, dim=self.dim)
            alias = [
                "x=Vi(" + str(self.dim) + ")", "y=Vj(" + str(self.dim) + ")",
                "p=Vj(" + str(self.dim * self.dim) + ")",
                "eye=Pm(" + str(self.dim * self.dim) + ")", "S=Pm(1)"
            ]
            reduction = Genred(formula,
                               alias,
                               reduction_op='Sum',
                               axis=1,
                               dtype=str(points.dtype).split(".")[1])
            return reduction(points,
                             self.support,
                             P.reshape(-1, self.dim * self.dim),
                             torch.eye(self.dim,
                                       dtype=self.support.dtype,
                                       device=self.device).flatten(),
                             self._keops_sigma,
                             backend=self._keops_backend).reshape(
                                 -1, self.dim,
                                 self.dim).transpose(1, 2).contiguous()

        else:
            raise RuntimeError(
                "StructuredField_pm.__call__(): keops computation not supported for order k =",
                k)

Example #5

0

Show file

    def __init__(self, manifold, sigma, label):
        super().__init__(manifold, sigma, label)

        self.__keops_dtype = str(manifold.gd.dtype).split(".")[1]
        self.__keops_backend = 'CPU'
        if str(self.device) != 'cpu':
            self.__keops_backend = 'GPU'

        self.__keops_invsigmasq = torch.tensor([1. / sigma / sigma],
                                               dtype=manifold.gd.dtype,
                                               device=manifold.device)

        formula_cost = "(Exp(-S*SqNorm2(x - y)/IntCst(2))*px | py)/IntCst(2)"
        alias_cost = [
            "x=Vi(" + str(self.dim) + ")", "y=Vj(" + str(self.dim) + ")",
            "px=Vi(" + str(self.dim) + ")", "py=Vj(" + str(self.dim) + ")",
            "S=Pm(1)"
        ]
        self.reduction_cost = Genred(formula_cost,
                                     alias_cost,
                                     reduction_op='Sum',
                                     axis=0,
                                     dtype=self.__keops_dtype)

        formula_cgc = "Exp(-S*SqNorm2(x - y)/IntCst(2))*X"
        alias_cgc = [
            "x=Vi(" + str(self.dim) + ")", "y=Vj(" + str(self.dim) + ")",
            "X=Vj(" + str(self.dim) + ")", "S=Pm(1)"
        ]
        self.solve_cgc = KernelSolve(formula_cgc,
                                     alias_cgc,
                                     "X",
                                     axis=1,
                                     dtype=self.__keops_dtype)

Example #6

0

Show file

    def varifold_scalar_product(self, x, y, lengths_x, lengths_y, normalized_x,
                                normalized_y, sigma):
        if self.__K is None:
            formula = "Exp(-S*SqNorm2(x - y)/IntCst(2)) * Square((u|v))*p"
            alias = [
                "x=Vi(3)", "y=Vj(3)", "u=Vi(3)", "v=Vj(3)", "p=Vj(1)",
                "S=Pm(1)"
            ]

            self.__K = Genred(formula,
                              alias,
                              reduction_op='Sum',
                              axis=1,
                              dtype=str(x.dtype).split('.')[1])
            self.__keops_backend = 'CPU'
            if str(x.device) != 'cpu':
                self.__keops_backend = 'GPU'

        if sigma not in self.__oos2:
            self.__oos2[sigma] = torch.tensor([1. / sigma / sigma],
                                              device=x.device,
                                              dtype=x.dtype)

        return (lengths_x * self.__K(x,
                                     y,
                                     normalized_x,
                                     normalized_y,
                                     lengths_y,
                                     self.__oos2[sigma],
                                     backend=self.__keops_backend)).sum()

Example #7

0

Show file

File: unit_tests_pytorch.py Project: getkeops/keops

    def test_generic_syntax_simple(self):
        ############################################################
        from pykeops.torch import Genred

        aliases = [
            "P = Pm(2)",  # 1st argument,  a parameter, dim 2.
            "X = Vi("
            + str(self.xc.shape[1])
            + ") ",  # 2nd argument, indexed by i, dim D.
            "Y = Vj(" + str(self.yc.shape[1]) + ") ",
        ]  # 3rd argument, indexed by j, dim D.

        formula = "Pow((X|Y),2) * ((Elem(P,0) * X) + (Elem(P,1) * Y))"

        if pykeops.config.gpu_available:
            backend_to_test = ["auto", "GPU_1D", "GPU_2D", "GPU"]
        else:
            backend_to_test = ["auto"]

        for b in backend_to_test:
            with self.subTest(b=b):
                my_routine = Genred(formula, aliases, reduction_op="Sum", axis=1)
                gamma_keops = my_routine(self.pc, self.xc, self.yc, backend=b)

                # Numpy version
                scals = (self.x @ self.y.T) ** 2  # Memory-intensive computation!
                gamma_py = self.p[0] * scals.sum(1).reshape(-1, 1) * self.x + self.p[
                    1
                ] * (scals @ self.y)

                # compare output
                self.assertTrue(
                    np.allclose(gamma_keops.cpu().data.numpy(), gamma_py, atol=1e-6)
                )

Example #8

0

Show file

    def test_heterogeneous_var_aliases(self):
        ############################################################
        from pykeops.torch import Genred
        from pykeops.numpy.utils import squared_distances

        aliases = ['p=Pm(0,1)', 'x=Vi(1,3)', 'y=Vj(2,3)']
        formula = 'Square(p-Var(3,1,1))*Exp(-SqNorm2(y-x))'

        # Call cuda kernel
        myconv = Genred(formula,
                        aliases,
                        reduction_op='Sum',
                        axis=1,
                        dtype='float32')
        gamma_keops = myconv(self.sigmac,
                             self.xc,
                             self.yc,
                             self.gc,
                             backend='auto')

        # Numpy version
        gamma_py = np.sum((self.sigma - self.g.T)**2 *
                          np.exp(-squared_distances(self.x, self.y)),
                          axis=1)

        # compare output
        self.assertTrue(
            np.allclose(gamma_keops.cpu().data.numpy().ravel(),
                        gamma_py.ravel(),
                        atol=1e-6))

Example #9

0

Show file

File: generic_ops.py Project: wayne9qiu/keops

def generic_logsumexp(formula, output, *aliases, **kwargs):
    r"""Alias for :class:`torch.Genred <pykeops.torch.Genred>` with a "LogSumExp" reduction.

    Args:
        formula (string): Scalar-valued symbolic KeOps expression, as in :class:`torch.Genred <pykeops.torch.Genred>`.
        output (string): An identifier of the form ``"AL = TYPE(1)"`` 
            that specifies the category and dimension of the output variable. Here:

              - ``AL`` is a dummy alphanumerical name.
              - ``TYPE`` is a *category*. One of:

                - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1.
                - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0.

        *aliases (strings): List of identifiers, as in :class:`torch.Genred <pykeops.torch.Genred>`.

    Keyword Args:
        dtype (string, default = ``"float32"``): Specifies the numerical **dtype** of the input and output arrays. 
            The supported values are:

              - **dtype** = ``"float16"`` or ``"half"``.
              - **dtype** = ``"float32"`` or ``"float"``.
              - **dtype** = ``"float64"`` or ``"double"``.

    Returns:
        A generic reduction that can be called on arbitrary
        Torch tensors, as documented in :class:`torch.Genred <pykeops.torch.Genred>`.

    Example:
        Log-likelihood of a Gaussian Mixture Model,

        .. math::
            a_i~=~f(x_i)~&=~ \log \sum_{j=1}^{N} \exp(-\gamma\cdot\|x_i-y_j\|^2)\cdot b_j \\\\
               ~&=~ \log \sum_{j=1}^{N} \exp\big(-\gamma\cdot\|x_i-y_j\|^2 \,+\, \log(b_j) \big).

        >>> log_likelihood = generic_logsumexp(
        ...     '(-(g * SqNorm2(x - y))) + b', # Formula
        ...     'a = Vi(1)',              # Output: 1 scalar per line
        ...     'x = Vi(3)',              # 1st input: dim-3 vector per line
        ...     'y = Vj(3)',              # 2nd input: dim-3 vector per line
        ...     'g = Pm(1)',              # 3rd input: vector of size 1
        ...     'b = Vj(1)')              # 4th input: 1 scalar per line
        >>> x = torch.randn(1000000, 3, requires_grad=True).cuda()
        >>> y = torch.randn(2000000, 3).cuda()
        >>> g = torch.Tensor([.5]).cuda()      # Parameter of our GMM
        >>> b = torch.rand(2000000, 1).cuda()  # Positive weights...
        >>> b = b / b.sum()                    # Normalized to get a probability measure
        >>> a = log_likelihood(x, y, g, b.log())  # a_i = log sum_j exp(-g*|x_i-y_j|^2) * b_j
        >>> print(a.shape)
        torch.Size([1000000, 1])
    """
    _, cat, _, _ = get_type(output)
    axis = cat2axis(cat)
    return Genred(formula,
                  aliases,
                  reduction_op='LogSumExp',
                  axis=axis,
                  **kwargs)

Example #10

0

Show file

File: generic_ops.py Project: tanglef/keops

def generic_argkmin(formula, output, *aliases, **kwargs) :
    r"""Alias for :class:`torch.Genred <pykeops.torch.Genred>` with an "ArgKMin" reduction.

    Args:
        formula (string): Scalar-valued symbolic KeOps expression, as in :class:`torch.Genred <pykeops.torch.Genred>`.
        output (string): An identifier of the form ``"AL = TYPE(K)"`` 
            that specifies the category and dimension of the output variable. Here:

              - ``AL`` is a dummy alphanumerical name.
              - ``TYPE`` is a *category*. One of:

                - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1.
                - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0.

              - ``K`` is an integer, the number of values to extract.

        *aliases (strings): List of identifiers, as in :class:`torch.Genred <pykeops.torch.Genred>`.

    Keyword Args:
        dtype (string, default = ``"float32"``): Specifies the numerical **dtype** of the input and output arrays. 
            The supported values are:

              - **dtype** = ``"float16"`` or ``"half"``.
              - **dtype** = ``"float32"`` or ``"float"``.
              - **dtype** = ``"float64"`` or ``"double"``.

    Returns:
        A generic reduction that can be called on arbitrary
        Torch tensors, as documented in :class:`torch.Genred <pykeops.torch.Genred>`.

    Example:
        Bruteforce K-nearest neighbors search in dimension 100:

        >>> knn = generic_argkmin(
        ...     'SqDist(x, y)',   # Formula
        ...     'a = Vi(3)',      # Output: 3 scalars per line
        ...     'x = Vi(100)',    # 1st input: dim-100 vector per line
        ...     'y = Vj(100)')    # 2nd input: dim-100 vector per line
        >>> x = torch.randn(5,     100)
        >>> y = torch.randn(20000, 100)
        >>> a = knn(x, y)
        >>> print(a)
        tensor([[ 9054., 11653., 11614.],
                [13466., 11903., 14180.],
                [14164.,  8809.,  3799.],
                [ 2092.,  3323., 18479.],
                [14433., 11315., 11841.]])
        >>> print( (x - y[ a[:,0].long() ]).norm(dim=1) )  # Distance to the nearest neighbor
        tensor([10.7933, 10.3235, 10.1218, 11.4919, 10.5100])
        >>> print( (x - y[ a[:,1].long() ]).norm(dim=1) )  # Distance to the second neighbor
        tensor([11.3702, 10.6550, 10.7646, 11.5676, 11.1356])
        >>> print( (x - y[ a[:,2].long() ]).norm(dim=1) )  # Distance to the third neighbor
        tensor([11.3820, 10.6725, 10.8510, 11.6071, 11.1968])
    """
    _,cat,k,_ = get_type(output)
    axis = cat2axis(cat)
    return Genred(formula, aliases, reduction_op='ArgKMin', axis=axis, opt_arg=k, **kwargs)

Example #11

0

Show file

File: StructuredField_0.py Project: bgris/implicitmodules

    def _compute_reduction_keops(self, points, k):
        if k == 0:
            kernel_formula = "Exp(-S*SqNorm2(x - y)/IntCst(2))"
            formula = kernel_formula + "*p"
            alias = [
                "x=Vi(" + str(self.dim) + ")", "y=Vj(" + str(self.dim) + ")",
                "p=Vj(" + str(self.dim) + ")", "S=Pm(1)"
            ]
            reduction = Genred(formula,
                               alias,
                               reduction_op='Sum',
                               axis=1,
                               dtype=self._keops_dtype)
            return reduction(points,
                             self.support,
                             self.moments,
                             self._keops_sigma,
                             backend=self._keops_backend).reshape(
                                 -1, self.dim)

        if k == 1:
            kernel_formula = "-S*Exp(-S*SqNorm2(x - y)/IntCst(2))*(x - y)"
            formula = "TensorProd(" + kernel_formula + ", p)"
            alias = [
                "x=Vi(" + str(self.dim) + ")", "y=Vj(" + str(self.dim) + ")",
                "p=Vj(" + str(self.dim) + ")", "S=Pm(1)"
            ]
            reduction = Genred(formula,
                               alias,
                               reduction_op='Sum',
                               axis=1,
                               dtype=self._keops_dtype)
            return reduction(points,
                             self.support,
                             self.moments,
                             self._keops_sigma,
                             backend=self._keops_backend).reshape(
                                 -1, self.dim,
                                 self.dim).transpose(1, 2).contiguous()

        else:
            raise RuntimeError(
                "StructuredField_0.__call__(): KeOps computation not supported for order k = "
                + str(k))

Example #12

0

Show file

File: energy.py Project: cliffyb824/SRNFmatch_code

 def K(x, y, u, v, f, g, weight_y):
     d = x.shape[1]
     pK = Genred(expr_geom + '*' + expr_grass + '*' + expr_fun + '*r', [
         'a=Pm(1)', 'b=Pm(1)', 'c=Pm(1)', 'x=Vi(' + str(d) + ')',
         'y=Vj(' + str(d) + ')', 'u=Vi(' + str(d) + ')',
         'v=Vj(' + str(d) + ')', 'g=Vi(1)', 'h=Vj(1)', 'r=Vj(1)'
     ],
                 reduction_op='Sum',
                 axis=1)
     return pK(1 / sig_geom**2, 1 / sig_grass**2, 1 / sig_fun**2, x, y, u,
               v, f, g, weight_y)

Example #13

0

Show file

File: generic_ops.py Project: wayne9qiu/keops

def generic_argmin(formula, output, *aliases, **kwargs):
    r"""Alias for :class:`torch.Genred <pykeops.torch.Genred>` with an "ArgMin" reduction.

    Args:
        formula (string): Scalar-valued symbolic KeOps expression, as in :class:`torch.Genred <pykeops.torch.Genred>`.
        output (string): An identifier of the form ``"AL = TYPE(1)"`` 
            that specifies the category and dimension of the output variable. Here:

              - ``AL`` is a dummy alphanumerical name.
              - ``TYPE`` is a *category*. One of:

                - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1.
                - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0.

        *aliases (strings): List of identifiers, as in :class:`torch.Genred <pykeops.torch.Genred>`.

    Keyword Args:
        dtype (string, default = ``"float32"``): Specifies the numerical **dtype** of the input and output arrays. 
            The supported values are:

              - **dtype** = ``"float16"`` or ``"float"``.
              - **dtype** = ``"float32"`` or ``"float"``.
              - **dtype** = ``"float64"`` or ``"double"``.

    Returns:
        A generic reduction that can be called on arbitrary
        Torch tensors, as documented in :class:`torch.Genred <pykeops.torch.Genred>`.

    Example:
        Bruteforce nearest neighbor search in dimension 100:

        >>> nearest_neighbor = generic_argmin(
        ...     'SqDist(x, y)',   # Formula
        ...     'a = Vi(1)',      # Output: 1 scalar per line
        ...     'x = Vi(100)',    # 1st input: dim-100 vector per line
        ...     'y = Vj(100)')    # 2nd input: dim-100 vector per line
        >>> x = torch.randn(5,     100)
        >>> y = torch.randn(20000, 100)
        >>> a = nearest_neighbor(x, y)
        >>> print(a)
        tensor([[ 8761.],
                [ 2836.],
                [  906.],
                [16130.],
                [ 3158.]])
        >>> dists = (x - y[ a.view(-1).long() ] ).norm(dim=1)  # Distance to the nearest neighbor
        >>> print(dists)
        tensor([10.5926, 10.9132,  9.9694, 10.1396, 10.1955])
    """
    _, cat, _, _ = get_type(output)
    axis = cat2axis(cat)
    return Genred(formula, aliases, reduction_op='ArgMin', axis=axis, **kwargs)

Example #14

0

Show file

 def log_prob(self, value):
     if self.use_pykeops:
         formula = "wj+Log(Step(xi-gj-IntCst(1)))+(ai-IntCst(1))*Log(IfElse(xi-gj-IntCst(1),xi-gj,xi))-bi*(xi-gj)"
         variables = [
             "wj = Vj(1)",
             "gj = Vj(1)",
             "ai = Vi(1)",
             "bi = Vi(1)",
             "xi = Vi(1)",
         ]
         dtype = self.concentration.dtype
         my_routine = Genred(
             formula,
             variables,
             reduction_op="LogSumExp",
             axis=1,
             dtype=str(dtype).split(".")[1],
         )
         concentration, value, rate = torch.broadcast_tensors(
             self.concentration, value, self.rate
         )
         shape = value.shape
         result = my_routine(
             self.offset_logits.reshape(-1, 1),
             self.offset_samples.reshape(-1, 1).to(dtype),
             concentration.reshape(-1, 1),
             rate.reshape(-1, 1).contiguous(),
             value.reshape(-1, 1).to(dtype),
             backend=self.device_pykeops,
         )
         result = result.reshape(shape)
         result = (
             self.concentration * torch.log(self.rate)
             - torch.lgamma(self.concentration)
             + result
         )
     else:
         value = torch.as_tensor(value).unsqueeze(-1)
         concentration = self.concentration.unsqueeze(-1)
         mask = value > self.offset_samples
         new_value = torch.where(
             mask, value - self.offset_samples, value.new_ones(())
         )
         obs_logits = (
             concentration * torch.log(self.rate)
             + (concentration - 1) * torch.log(new_value)
             - self.rate * (new_value)
             - torch.lgamma(concentration)
         )
         result = obs_logits + self.offset_logits + torch.log(mask)
         result = torch.logsumexp(result, -1)
     return result.sum((-2, -1))

Example #15

0

Show file

    def test_generic_syntax_softmax(self):
        ############################################################
        from pykeops.torch import Genred

        aliases = ["p=Pm(1)", "a=Vj(1)", "x=Vi(3)", "y=Vj(3)"]
        formula = "Square(p-a)*Exp(-SqNorm2(x-y))"
        formula_weights = "y"
        if pykeops.config.gpu_available:
            backend_to_test = ["auto", "GPU_1D", "GPU_2D", "GPU"]
        else:
            backend_to_test = ["auto"]

        for b in backend_to_test:
            with self.subTest(b=b):
                # Call cuda kernel
                myop = Genred(
                    formula,
                    aliases,
                    reduction_op="SumSoftMaxWeight",
                    axis=1,
                    dtype="float64",
                    formula2=formula_weights,
                )
                gamma_keops = myop(self.sigmacd,
                                   self.gcd,
                                   self.xcd,
                                   self.ycd,
                                   backend=b)

                # Numpy version
                def np_softmax(x, w):
                    x -= np.max(
                        x, axis=1)[:, None]  # subtract the max for robustness
                    return np.exp(x) @ w / np.sum(np.exp(x), axis=1)[:, None]

                gamma_py = np_softmax(
                    (self.sigma - self.g.T)**2 *
                    np.exp(-squared_distances(self.x, self.y)),
                    self.y,
                )

                # compare output
                self.assertTrue(
                    np.allclose(gamma_keops.cpu().data.numpy(),
                                gamma_py,
                                atol=1e-6))

Example #16

0

Show file

File: generic_ops.py Project: wayne9qiu/keops

def generic_sum(formula, output, *aliases, **kwargs):
    r"""Alias for :class:`torch.Genred <pykeops.torch.Genred>` with a "Sum" reduction.

    Args:
        formula (string): Symbolic KeOps expression, as in :class:`torch.Genred <pykeops.torch.Genred>`.
        output (string): An identifier of the form ``"AL = TYPE(DIM)"`` 
            that specifies the category and dimension of the output variable. Here:

              - ``AL`` is a dummy alphanumerical name.
              - ``TYPE`` is a *category*. One of:

                - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1.
                - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0.

              - ``DIM`` is an integer, the dimension of the output variable; it should be compatible with **formula**.
        *aliases (strings): List of identifiers, as in :class:`torch.Genred <pykeops.torch.Genred>`.

    Keyword Args:
        dtype (string, default = ``"float32"``): Specifies the numerical **dtype** of the input and output arrays. 
            The supported values are:

              - **dtype** = ``"float16"`` or ``"half"``.
              - **dtype** = ``"float32"`` or ``"float"``.
              - **dtype** = ``"float64"`` or ``"double"``.

    Returns:
        A generic reduction that can be called on arbitrary
        Torch tensors, as documented in :class:`torch.Genred <pykeops.torch.Genred>`.

    Example:
        >>> my_conv = generic_sum(       # Custom Kernel Density Estimator
        ...     'Exp(-SqNorm2(x - y))',  # Formula
        ...     'a = Vi(1)',             # Output: 1 scalar per line
        ...     'x = Vi(3)',             # 1st input: dim-3 vector per line
        ...     'y = Vj(3)')             # 2nd input: dim-3 vector per line
        >>> # Apply it to 2d arrays x and y with 3 columns and a (huge) number of lines
        >>> x = torch.randn(1000000, 3, requires_grad=True).cuda()
        >>> y = torch.randn(2000000, 3).cuda()
        >>> a = my_conv(x, y)  # a_i = sum_j exp(-|x_i-y_j|^2)
        >>> print(a.shape)
        torch.Size([1000000, 1])
    """
    _, cat, _, _ = get_type(output)
    axis = cat2axis(cat)
    return Genred(formula, aliases, reduction_op='Sum', axis=axis, **kwargs)

Example #17

0

Show file

File: unit_tests_pytorch.py Project: getkeops/keops

    def test_pickle(self):
        ############################################################
        from pykeops.torch import Genred
        import pickle

        formula = "SqDist(x,y)"
        aliases = [
            "x = Vi(" + str(self.D) + ")",  # First arg   : i-variable, of size D
            "y = Vj(" + str(self.D) + ")",  # Second arg  : j-variable, of size D
        ]

        kernel_instance = Genred(formula, aliases, reduction_op="Sum", axis=1)

        # serialize/pickle
        serialized_kernel = pickle.dumps(kernel_instance)
        # deserialize/unpickle
        deserialized_kernel = pickle.loads(serialized_kernel)

        self.assertTrue(type(kernel_instance), type(deserialized_kernel))

Example #18

0

Show file

    def test_non_contiguity(self):
        ############################################################
        from pykeops.torch import Genred

        aliases = [
            "P = Pm(2)",  # 1st argument,  a parameter, dim 2.
            "X = Vi(" + str(self.xc.shape[1]) +
            ") ",  # 2nd argument, indexed by i, dim D.
            "Y = Vj(" + str(self.yc.shape[1]) + ") ",
        ]  # 3rd argument, indexed by j, dim D.

        formula = "Pow((X|Y),2) * ((Elem(P,0) * X) + (Elem(P,1) * Y))"

        my_routine = Genred(formula, aliases, reduction_op="Sum", axis=1)
        yc_tmp = self.yc.t().contiguous().t()  # create a non contiguous copy

        # check output
        self.assertFalse(yc_tmp.is_contiguous())
        my_routine(self.pc, self.xc, yc_tmp, backend="auto")

Example #19

0

Show file

    def test_non_contiguity(self):
        ############################################################
        from pykeops.torch import Genred

        aliases = [
            'P = Pm(2)',  # 1st argument,  a parameter, dim 2.
            'X = Vi(' + str(self.xc.shape[1]) +
            ') ',  # 2nd argument, indexed by i, dim D.
            'Y = Vj(' + str(self.yc.shape[1]) + ') '
        ]  # 3rd argument, indexed by j, dim D.

        formula = 'Pow((X|Y),2) * ((Elem(P,0) * X) + (Elem(P,1) * Y))'

        my_routine = Genred(formula, aliases, reduction_op='Sum', axis=1)
        yc_tmp = self.yc.t().contiguous().t()  # create a non contiguous copy

        # check output
        self.assertFalse(yc_tmp.is_contiguous())
        with self.assertRaises(RuntimeError):
            my_routine(self.pc, self.xc, yc_tmp, backend='auto')

Example #20

0

Show file

def WarmUpGpu():
    # dummy first calls for accurate timing in case of GPU use
    print("Warming up the Gpu (torch bindings) !!!")
    if torch.cuda.is_available():
        formula = 'Exp(-oos2*SqDist(x,y))*b'
        aliases = [
            'x = Vi(1)',  # First arg   : i-variable, of size 1
            'y = Vj(1)',  # Second arg  : j-variable, of size 1
            'b = Vj(1)',  # Third arg  : j-variable, of size 1
            'oos2 = Pm(1)'
        ]  # Fourth arg  : scalar parameter
        my_routine = Genred(formula,
                            aliases,
                            reduction_op='Sum',
                            axis=1,
                            dtype='float32')
        dum = torch.rand(10, 1)
        dum2 = torch.rand(10, 1)
        my_routine(dum, dum, dum2, torch.tensor([1.0]))
        my_routine(dum, dum, dum2, torch.tensor([1.0]))

Example #21

0

Show file

    def test_logSumExp_gradient_kernels_feature(self):
        ############################################################
        import torch
        from pykeops.torch import Genred

        aliases = [
            'P = Pm(2)',  # 1st argument,  a parameter, dim 2.
            'X = Vi(' + str(self.gc.shape[1]) +
            ') ',  # 2nd argument, indexed by i, dim D.
            'Y = Vj(' + str(self.fc.shape[1]) + ') '
        ]  # 3rd argument, indexed by j, dim D.

        formula = '(Elem(P,0) * X) + (Elem(P,1) * Y)'

        # Pytorch version
        my_routine = Genred(formula, aliases, reduction_op='LogSumExp', axis=1)
        tmp = my_routine(self.pc, self.fc, self.gc, backend='auto')
        res = torch.dot(
            torch.ones_like(tmp).view(-1),
            tmp.view(-1))  # equivalent to tmp.sum() but avoiding contiguity pb
        gamma_keops = torch.autograd.grad(res, [self.fc, self.gc],
                                          create_graph=False)

        # Numpy version
        tmp = self.p[0] * self.f + self.p[1] * self.g.T
        res_py = (np.exp(tmp)).sum(axis=1)
        tmp2 = np.exp(tmp.T) / res_py.reshape(1, -1)
        gamma_py = [
            np.ones(self.M) * self.p[0], self.p[1] * tmp2.T.sum(axis=0)
        ]

        # compare output
        self.assertTrue(
            np.allclose(gamma_keops[0].cpu().data.numpy().ravel(),
                        gamma_py[0],
                        atol=1e-6))
        self.assertTrue(
            np.allclose(gamma_keops[1].cpu().data.numpy().ravel(),
                        gamma_py[1],
                        atol=1e-6))

Example #22

0

Show file

    def test_softmax(self):
        ############################################################

        import torch
        from pykeops.torch import Genred

        formula = "SqDist(x,y)"
        formula_weights = "b"
        aliases = [
            "x = Vi(" + str(self.D) +
            ")",  # First arg   : i-variable, of size D
            "y = Vj(" + str(self.D) +
            ")",  # Second arg  : j-variable, of size D
            "b = Vj(" + str(self.E) + ")",
        ]  # third arg : j-variable, of size Dv

        softmax_op = Genred(
            formula,
            aliases,
            reduction_op="SumSoftMaxWeight",
            axis=1,
            formula2=formula_weights,
        )

        c = softmax_op(self.xc, self.yc, self.bc)

        # compare with direct implementation
        cc = 0
        for k in range(self.D):
            xk = self.xc[:, k][:, None]
            yk = self.yc[:, k][:, None]
            cc += (xk - yk.t())**2
        cc -= torch.max(cc, dim=1)[0][:,
                                      None]  # subtract the max for robustness
        cc = torch.exp(cc) @ self.bc / torch.sum(torch.exp(cc), dim=1)[:, None]

        self.assertTrue(
            np.allclose(c.cpu().data.numpy().ravel(),
                        cc.cpu().data.numpy().ravel(),
                        atol=1e-6))

Example #23

0

Show file

def FeaturesKP(kernel,
               gs,
               xs,
               ys,
               bs,
               mode='sum',
               backend='auto',
               dtype='float32'):
    if backend in ['pytorch', 'matrix']:
        domain, torch_map = pytorch_routines[mode]
        if domain == 'sum':
            routine = kernel.routine_sum
        elif domain == 'log':
            routine = kernel.routine_log

        return torch_map(routine, gs, xs, ys, bs, matrix=(backend == 'matrix'))

    else:
        red, formula, bs_cat = keops_routines[mode]

        formula = formula.format(f_sum=kernel.formula_sum,
                                 f_log=kernel.formula_log)

        # Given the output sizes, we must generate the appropriate list of aliases

        # We will store the arguments as follow :
        # [ G_0, G_1, ..., X_0, X_1, Y_0, Y_1, ...]
        full_args, aliases, index = [], [], 0  # tensor list, string list, current input arg

        # First, the G_i's
        for (i, g) in enumerate(gs):
            if g is not None:
                g_var, g_dim, g_cat, g_str = extract_metric_parameters(
                    g)  # example : Tensor(...), 3, 0, 'Vi'
                aliases.append('G_{g_ind} = {g_str}({index}, {g_dim})'.format(
                    g_ind=i, g_str=g_str, index=index, g_dim=g_dim))
                full_args.append(g_var)
                index += 1

        # Then, the X_i's
        for (i, x) in enumerate(xs):
            x_dim = x.size(1)
            aliases.append('X_{x_ind} = Vi({index}, {x_dim})'.format(
                x_ind=i, index=index, x_dim=x_dim))
            full_args.append(x)
            index += 1

        # Then, the Y_j's
        for (j, y) in enumerate(ys):
            y_dim = y.size(1)
            aliases.append('Y_{y_ind} = Vj({index}, {y_dim})'.format(
                y_ind=j, index=index, y_dim=y_dim))
            full_args.append(y)
            index += 1

        if not len(xs) == len(ys):
            raise ValueError(
                "Kernel_product works with pairs of variables. The 'x'-list of features should thus have the same length as the 'y' one."
            )

        # Then, the B_i/j's
        for (i, (b, b_cat)) in enumerate(zip(bs, bs_cat)):
            b_dim = b.size(1)
            b_str = ['Vi', 'Vj', 'Pm'][b_cat]
            aliases.append('B_{b_ind} = {b_str}({index}, {b_dim})'.format(
                b_ind=i, b_str=b_str, index=index, b_dim=b_dim))
            full_args.append(b)
            index += 1

        axis = 1  # the output vector is indexed by 'i' (CAT=0)
        genconv = Genred(formula,
                         aliases,
                         reduction_op=red,
                         axis=axis,
                         dtype=dtype)

        return genconv(*full_args, backend=backend)

Example #24

0

Show file

# .. math::
#
#   a_i = \sum_{j=1}^M (\langle x_i,y_j \rangle^2) (p_0 x_i + p_1 y_j)
#
# where the two real parameters are stored in a 2-vector :math:`p=(p_0,p_1)`.

# Keops implementation.
# Note that Square(...) is more efficient than Pow(...,2)
formula = "Square((X|Y)) * ((Elem(P, 0) * X) + (Elem(P, 1) * Y))"
variables = [
    "P = Pm(2)",  # 1st argument,  a parameter, dim 2.
    "X = Vi(3)",  # 2nd argument, indexed by i, dim D.
    "Y = Vj(3)",
]  # 3rd argument, indexed by j, dim D.

my_routine = Genred(formula, variables, reduction_op="Sum", axis=1)
a_keops = my_routine(p, x, y)

# Vanilla PyTorch implementation
scals = (torch.mm(x, y.t()))**2  # Memory-intensive computation!
a_pytorch = p[0] * scals.sum(1).view(-1, 1) * x + p[1] * (torch.mm(scals, y))

# Plot the results next to each other:
for i in range(D):
    plt.subplot(D, 1, i + 1)
    plt.plot(a_keops.detach().cpu().numpy()[:40, i], "-", label="KeOps")
    plt.plot(a_pytorch.detach().cpu().numpy()[:40, i], "--", label="PyTorch")
    plt.legend(loc="lower right")
plt.tight_layout()
plt.show()

Example #25

0

Show file

    "y = Vj(1)",  # Second arg  : j-variable, of size 1 (scalar)
    "a = Vj(1)",  # Third arg   : j-variable, of size 1 (scalar)
    "p = Pm(1)",  # Fourth arg  : Parameter,  of size 1 (scalar)
    "b = Vj(3)",
]  # Fifth arg   : j-variable, of size 3 (vector)

start = time.time()

####################################################################
# Our log-sum-exp reduction is performed over the index :math:`j`,
# i.e. on the axis ``1`` of the kernel matrix.
# The output c is an :math:`x`-variable indexed by :math:`i`.

my_routine = Genred(formula,
                    variables,
                    reduction_op="LogSumExp",
                    axis=1,
                    dtype=dtype,
                    formula2=formula2)
c = my_routine(x, y, a, p, b, backend="CPU")

# N.B.: By specifying backend='CPU', we can make sure that the result is computed using a simple C++ for loop.
print(
    "Time to compute the convolution operation on the cpu: ",
    round(time.time() - start, 5),
    "s",
    end=" ",
)

#######################################################################
# We compare with the unstable, naive computation "Log of Sum of Exp":

Example #26

0

Show file

# ---------------
#
# Create a new generic routine using the :class:`pykeops.numpy.Genred`
# constructor:

formula = 'SqDist(x,y)'
formula_weights = 'b'
aliases = [
    'x = Vi(' + str(D) + ')',  # First arg:  i-variable of size D
    'y = Vj(' + str(D) + ')',  # Second arg: j-variable of size D
    'b = Vj(' + str(Dv) + ')'
]  # Third arg:  j-variable of size Dv

softmax_op = Genred(formula,
                    aliases,
                    reduction_op='SumSoftMaxWeight',
                    axis=1,
                    formula2=formula_weights)

# Dummy first call to warmup the GPU and get accurate timings:
_ = softmax_op(x, y, b)

###############################################################################
# Use our new function on arbitrary Numpy arrays:
#

start = time.time()
c = softmax_op(x, y, b)
print("Timing (KeOps implementation): ", round(time.time() - start, 5), "s")

# compare with direct implementation

Example #27

0

Show file

def run_keops_mmv(X1: torch.Tensor,
                  X2: torch.Tensor,
                  v: torch.Tensor,
                  other_vars: List[torch.Tensor],
                  out: Optional[torch.Tensor],
                  formula: str,
                  aliases: List[str],
                  axis: int,
                  reduction: str = 'Sum',
                  opt: Optional[FalkonOptions] = None) -> torch.Tensor:
    if opt is None:
        opt = FalkonOptions()
    # Choose backend
    N, D = X1.shape
    T = v.shape[1]
    backend = _decide_backend(opt, D)
    dtype = _keops_dtype(X1.dtype)
    device = X1.device

    if not check_same_device(X1, X2, v, out, *other_vars):
        raise RuntimeError("All input tensors must be on the same device.")
    if (device.type == 'cuda') and (not backend.startswith("GPU")):
        warnings.warn(
            "KeOps backend was chosen to be CPU, but GPU input tensors found. "
            "Defaulting to 'GPU_1D' backend. To force usage of the CPU backend, "
            "please pass CPU tensors; to avoid this warning if the GPU backend is "
            "desired, check your options (i.e. set 'use_cpu=False').")
        backend = "GPU_1D"

    # Define formula wrapper
    fn = Genred(formula,
                aliases,
                reduction_op=reduction,
                axis=axis,
                dtype=dtype,
                dtype_acc=opt.keops_acc_dtype,
                sum_scheme=opt.keops_sum_scheme)

    # Create output matrix
    if out is None:
        # noinspection PyArgumentList
        out = torch.empty(N,
                          T,
                          dtype=X1.dtype,
                          device=device,
                          pin_memory=(backend != 'CPU')
                          and (device.type == 'cpu'))

    if backend.startswith("GPU") and device.type == 'cpu':
        # slack is high due to imprecise memory usage estimates for keops
        gpu_info = _get_gpu_info(opt, slack=opt.keops_memory_slack)
        block_sizes = calc_gpu_block_sizes(gpu_info, N)

        # Create queues
        args = []  # Arguments passed to each subprocess
        for i, g in enumerate(gpu_info):
            # First round of subdivision
            bwidth = block_sizes[i + 1] - block_sizes[i]
            if bwidth <= 0:
                continue
            args.append((ArgsFmmv(X1=X1.narrow(0, block_sizes[i], bwidth),
                                  X2=X2,
                                  v=v,
                                  out=out.narrow(0, block_sizes[i], bwidth),
                                  other_vars=other_vars,
                                  function=fn,
                                  backend=backend,
                                  gpu_ram=g.usable_ram), g.Id))
        _start_wait_processes(_single_gpu_method, args)
    else:  # Run on CPU or GPU with CUDA inputs
        variables = [X1, X2, v] + other_vars
        if device.type == 'cuda':
            with torch.cuda.device(device):
                sync_current_stream(device)
                out = fn(*variables, out=out, backend=backend)
        else:
            out = fn(*variables, out=out, backend=backend)

    return out

Example #28

0

Show file

formula = 'Square(p-a)*Exp(x+y)'
variables = [
    'x = Vi(3)',  # First arg   : i-variable, of size 3
    'y = Vj(3)',  # Second arg  : j-variable, of size 3
    'a = Vj(1)',  # Third arg   : j-variable, of size 1 (scalar)
    'p = Pm(1)'
]  # Fourth  arg : Parameter,  of size 1 (scalar)

####################################################################
# Our sum reduction is performed over the index :math:`j`,
# i.e. on the axis ``1`` of the kernel matrix.
# The output c is an :math:`x`-variable indexed by :math:`i`.

my_routine = Genred(formula,
                    variables,
                    reduction_op='Sum',
                    axis=1,
                    dtype=dtype)
c = my_routine(x, y, a, p)

####################################################################
# Compute the gradient
# --------------------
# Now, let's compute the gradient of :math:`c` with
# respect to :math:`y`. Since :math:`c` is not scalar valued,
# its "gradient" :math:`\partial c` should be understood as the adjoint of the
# differential operator, i.e. as the linear operator that:
#
# - takes as input a new tensor :math:`e` with the shape of :math:`c`
# - outputs a tensor :math:`g` with the shape of :math:`y`
#

Example #29

0

Show file

def run_keops_mmv(X1: torch.Tensor,
                  X2: torch.Tensor,
                  v: torch.Tensor,
                  other_vars: List[torch.Tensor],
                  out: Optional[torch.Tensor],
                  formula: str,
                  aliases: List[str],
                  axis: int,
                  reduction: str = 'Sum',
                  opt: Optional[FalkonOptions] = None) -> torch.Tensor:
    if opt is None:
        opt = FalkonOptions()
    # Choose backend
    N, D = X1.shape
    T = v.shape[1]
    backend = _decide_backend(opt, D)
    dtype = _keops_dtype(X1.dtype)
    device = X1.device

    if not check_same_device(X1, X2, v, out, *other_vars):
        raise RuntimeError("All input tensors must be on the same device.")
    if (device.type == 'cuda') and (not backend.startswith("GPU")):
        warnings.warn("KeOps backend was chosen to be CPU, but GPU input tensors found. "
                      "Defaulting to 'GPU_1D' backend. To force usage of the CPU backend, "
                      "please pass CPU tensors; to avoid this warning if the GPU backend is "
                      "desired, check your options (i.e. set 'use_cpu=False').")
        backend = "GPU_1D"

    # Define formula wrapper
    fn = Genred(formula, aliases,
                reduction_op=reduction, axis=axis,
                dtype=dtype, dtype_acc=opt.keops_acc_dtype,
                sum_scheme=opt.keops_sum_scheme)

    # Compile on a small data subset
    small_data_variables = [X1[:100], X2[:10], v[:10]] + other_vars
    small_data_out = torch.empty((100, T), dtype=X1.dtype, device=device)
    fn(*small_data_variables, out=small_data_out, backend=backend)

    # Create output matrix
    if out is None:
        # noinspection PyArgumentList
        out = torch.empty(N, T, dtype=X1.dtype, device=device,
                          pin_memory=(backend != 'CPU') and (device.type == 'cpu'))

    if backend.startswith("GPU") and device.type == 'cpu':
        # Info about GPUs
        ram_slack = 0.7  # slack is high due to imprecise memory usage estimates
        gpu_info = [v for k, v in devices.get_device_info(opt).items() if k >= 0]
        gpu_ram = [
            min((g.free_memory - 300 * 2 ** 20) * ram_slack, opt.max_gpu_mem * ram_slack)
            for g in gpu_info
        ]
        block_sizes = calc_gpu_block_sizes(gpu_info, N)

        # Create queues
        args = []  # Arguments passed to each subprocess
        for i in range(len(gpu_info)):
            # First round of subdivision
            bwidth = block_sizes[i + 1] - block_sizes[i]
            if bwidth <= 0:
                continue

            args.append((ArgsFmmv(
                X1=X1.narrow(0, block_sizes[i], bwidth),
                X2=X2,
                v=v,
                out=out.narrow(0, block_sizes[i], bwidth),
                other_vars=other_vars,
                function=fn,
                backend=backend,
                gpu_ram=gpu_ram[i]
            ), gpu_info[i].Id))
        _start_wait_processes(_single_gpu_method, args)
    else:  # Run on CPU or GPU with CUDA inputs
        variables = [X1, X2, v] + other_vars
        out = fn(*variables, out=out, backend=backend)

    return out

Example #30

0

Show file

File: plot_generic_syntax_pytorch_LSE_vect.py Project: yechengxi/keops

formula = 'Square(p-a)*Exp(x+y)'
formula2 = 'b'
variables = ['x = Vi(1)',  # First arg   : i-variable, of size 1 (scalar)
             'y = Vj(1)',  # Second arg  : j-variable, of size 1 (scalar)
             'a = Vj(1)',  # Third arg   : j-variable, of size 1 (scalar)
             'p = Pm(1)',  # Fourth arg  : Parameter,  of size 1 (scalar)
             'b = Vj(3)']  # Fifth arg   : j-variable, of size 3 (vector)
                      
start = time.time()

####################################################################
# Our log-sum-exp reduction is performed over the index :math:`j`,
# i.e. on the axis ``1`` of the kernel matrix.
# The output c is an :math:`x`-variable indexed by :math:`i`.

my_routine = Genred(formula, variables, reduction_op='LogSumExp', axis=1, dtype=dtype, formula2=formula2)
c = my_routine(x, y, a, p, b, backend='CPU')

# N.B.: By specifying backend='CPU', we can make sure that the result is computed using a simple C++ for loop.
print('Time to compute the convolution operation on the cpu: ', round(time.time()-start,5), 's', end=' ')

#######################################################################
# We compare with the unstable, naive computation "Log of Sum of Exp":

my_routine2 = Genred('Exp('+formula+')*'+formula2, variables, reduction_op='Sum', axis=1, dtype=dtype)
c2 = torch.log(my_routine2(x, y, a, p, b, backend='CPU'))
print('(relative error: ',((c2-c).norm()/c.norm()).item(), ')')

# Plot the results next to each other:
for i in range(3):
    plt.subplot(3, 1, i+1)