コード例 #1
0
    def forward(ctx, formula, aliases, backend, dtype, device_id, ranges, accuracy_flags, *args):
    
        optional_flags = ['-DPYTORCH_INCLUDE_DIR=' + ';'.join(include_dirs)] + accuracy_flags

        myconv = LoadKeOps(formula, aliases, dtype, 'torch', optional_flags).import_module()

        # Context variables: save everything to compute the gradient:
        ctx.formula = formula
        ctx.aliases = aliases
        ctx.backend = backend
        ctx.dtype = dtype
        ctx.device_id = device_id
        ctx.ranges = ranges
        ctx.accuracy_flags = accuracy_flags
        ctx.myconv = myconv

        tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args)

        if tagCPUGPU==1 & tagHostDevice==1:
            device_id = args[0].device.index
            for i in range(1,len(args)):
                if args[i].device.index != device_id:
                    raise ValueError("[KeOps] Input arrays must be all located on the same device.")
        
        if ranges is None : ranges = () # To keep the same type

        result = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice, device_id, ranges, *args)

        # relying on the 'ctx.saved_variables' attribute is necessary  if you want to be able to differentiate the output
        #  of the backward once again. It helps pytorch to keep track of 'who is who'.
        ctx.save_for_backward(*args, result)

        return result
コード例 #2
0
ファイル: operations.py プロジェクト: yechengxi/keops
    def forward(ctx, formula, aliases, varinvpos, alpha, backend, dtype,
                device_id, eps, ranges, accuracy_flags, *args):

        optional_flags = ['-DPYTORCH_INCLUDE_DIR=' + ';'.join(include_dirs)
                          ] + accuracy_flags

        myconv = LoadKeOps(formula, aliases, dtype, 'torch',
                           optional_flags).import_module()

        # Context variables: save everything to compute the gradient:
        ctx.formula = formula
        ctx.aliases = aliases
        ctx.varinvpos = varinvpos
        ctx.alpha = alpha
        ctx.backend = backend
        ctx.dtype = dtype
        ctx.device_id = device_id
        ctx.eps = eps
        ctx.myconv = myconv
        ctx.ranges = ranges
        ctx.accuracy_flags = accuracy_flags
        if ranges is None: ranges = ()  # To keep the same type

        varinv = args[varinvpos]
        ctx.varinvpos = varinvpos

        tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args)

        if tagCPUGPU == 1 & tagHostDevice == 1:
            device_id = args[0].device.index
            for i in range(1, len(args)):
                if args[i].device.index != device_id:
                    raise ValueError(
                        "[KeOps] Input arrays must be all located on the same device."
                    )

        (categories, dimensions) = parse_aliases(aliases)

        def linop(var):
            newargs = args[:varinvpos] + (var, ) + args[varinvpos + 1:]
            res = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice,
                                        device_id, ranges, categories,
                                        dimensions, *newargs)
            if alpha:
                res += alpha * var
            return res

        global copy
        result = ConjugateGradientSolver('torch', linop, varinv.data, eps)

        # relying on the 'ctx.saved_variables' attribute is necessary  if you want to be able to differentiate the output
        #  of the backward once again. It helps pytorch to keep track of 'who is who'.
        ctx.save_for_backward(*args, result)

        return result
コード例 #3
0
    def forward(ctx, formula, aliases, backend, dtype, device_id, ranges,
                optional_flags, rec_multVar_highdim, nx, ny, *args):

        # N.B. when rec_multVar_highdim option is set, it means that formula is of the form "sum(F*b)", where b is a variable
        # with large dimension. In this case we set compiler option MULT_VAR_HIGHDIM to allow for the use of the special "final chunk" computation
        # mode. However, this may not be also true for the gradients of the same formula. In fact only the gradient
        # with respect to variable b will have the same form. Hence, we save optional_flags current status into ctx,
        # before adding the MULT_VAR_HIGHDIM compiler option.
        ctx.optional_flags = optional_flags.copy()
        if rec_multVar_highdim is not None:
            optional_flags += ["-DMULT_VAR_HIGHDIM=1"]

        myconv = LoadKeOps(formula, aliases, dtype, 'torch', optional_flags,
                           include_dirs).import_module()

        # Context variables: save everything to compute the gradient:
        ctx.formula = formula
        ctx.aliases = aliases
        ctx.backend = backend
        ctx.dtype = dtype
        ctx.device_id = device_id
        ctx.ranges = ranges
        ctx.rec_multVar_highdim = rec_multVar_highdim
        ctx.myconv = myconv
        ctx.nx = nx
        ctx.ny = ny

        tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args)

        if tagCPUGPU == 1 & tagHostDevice == 1:
            device_id = args[0].device.index
            for i in range(1, len(args)):
                if args[i].device.index != device_id:
                    raise ValueError(
                        "[KeOps] Input arrays must be all located on the same device."
                    )

        if ranges is None:
            ranges = ()  # To keep the same type

        # N.B.: KeOps C++ expects contiguous integer arrays as ranges
        ranges = tuple(r.contiguous() for r in ranges)

        result = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice,
                                       device_id, ranges, nx, ny, *args)

        # relying on the 'ctx.saved_variables' attribute is necessary  if you want to be able to differentiate the output
        #  of the backward once again. It helps pytorch to keep track of 'who is who'.
        ctx.save_for_backward(*args, result)

        return result
コード例 #4
0
ファイル: operations.py プロジェクト: kshitij12345/keops
    def forward(ctx, formula, aliases, varinvpos, alpha, backend, dtype,
                device_id, eps, ranges, optional_flags, rec_multVar_highdim,
                *args):

        optional_flags += include_dirs

        # N.B. when rec_multVar_highdim option is set, it means that formula is of the form "sum(F*b)", where b is a variable
        # with large dimension. In this case we set compiler option MULT_VAR_HIGHDIM to allow for the use of the special "final chunk" computation
        # mode. However, this may not be also true for the gradients of the same formula. In fact only the gradient
        # with respect to variable b will have the same form. Hence, we save optional_flags current status into ctx,
        # before adding the MULT_VAR_HIGHDIM compiler option.
        ctx.optional_flags = optional_flags.copy()
        if rec_multVar_highdim is not None:
            optional_flags += ["-DMULT_VAR_HIGHDIM=1"]

        myconv = LoadKeOps(formula, aliases, dtype, "torch",
                           optional_flags).import_module()

        # Context variables: save everything to compute the gradient:
        ctx.formula = formula
        ctx.aliases = aliases
        ctx.varinvpos = varinvpos
        ctx.alpha = alpha
        ctx.backend = backend
        ctx.dtype = dtype
        ctx.device_id = device_id
        ctx.eps = eps
        ctx.myconv = myconv
        ctx.ranges = ranges
        ctx.rec_multVar_highdim = rec_multVar_highdim
        ctx.optional_flags = optional_flags
        if ranges is None:
            ranges = ()  # To keep the same type

        varinv = args[varinvpos]
        ctx.varinvpos = varinvpos

        tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args)

        if tagCPUGPU == 1 & tagHostDevice == 1:
            device_id = args[0].device.index
            for i in range(1, len(args)):
                if args[i].device.index != device_id:
                    raise ValueError(
                        "[KeOps] Input arrays must be all located on the same device."
                    )

        def linop(var):
            newargs = args[:varinvpos] + (var, ) + args[varinvpos + 1:]
            res = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice,
                                        device_id, ranges, *newargs)
            if alpha:
                res += alpha * var
            return res

        global copy
        result = ConjugateGradientSolver("torch", linop, varinv.data, eps)

        # relying on the 'ctx.saved_variables' attribute is necessary  if you want to be able to differentiate the output
        #  of the backward once again. It helps pytorch to keep track of 'who is who'.
        ctx.save_for_backward(*args, result)

        return result
コード例 #5
0
    def __call__(self,
                 *args,
                 backend='auto',
                 device_id=-1,
                 alpha=1e-10,
                 eps=1e-6,
                 ranges=None):
        r"""
        To apply the routine on arbitrary NumPy arrays.
            
        Warning:
            Even for variables of size 1 (e.g. :math:`a_i\in\mathbb{R}`
            for :math:`i\in[0,M)`), KeOps expects inputs to be formatted
            as 2d arrays of size ``(M,dim)``. In practice,
            ``a.view(-1,1)`` should be used to turn a vector of weights
            into a *list of scalar values*.
        
        Args:
            *args (2d arrays (variables ``Vi(..)``, ``Vj(..)``) and 1d arrays (parameters ``Pm(..)``)): The input numerical arrays, 
                which should all have the same ``dtype``, be **contiguous** and be stored on 
                the **same device**. KeOps expects one array per alias, 
                with the following compatibility rules:

                    - All ``Vi(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`M`.
                    - All ``Vj(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`N`.
                    - All ``Pm(Dim_k)`` variables are encoded as **1d-arrays** (vectors) of size ``Dim_k``.

        Keyword Args:
            alpha (float, default = 1e-10): Non-negative 
                **ridge regularization** parameter, added to the diagonal
                of the Kernel matrix :math:`K_{xx}`.

            backend (string): Specifies the map-reduce scheme,
                as detailed in the documentation 
                of the :class:`numpy.Genred <pykeops.numpy.Genred>` module.

            device_id (int, default=-1): Specifies the GPU that should be used 
                to perform   the computation; a negative value lets your system 
                choose the default GPU. This parameter is only useful if your 
                system has access to several GPUs.

            ranges (6-uple of IntTensors, None by default):
                Ranges of integers that specify a 
                :doc:`block-sparse reduction scheme <../../sparsity>`
                with *Mc clusters along axis 0* and *Nc clusters along axis 1*,
                as detailed in the documentation 
                of the :class:`numpy.Genred <pykeops.numpy.Genred>` module.

                If **None** (default), we simply use a **dense Kernel matrix**
                as we loop over all indices
                :math:`i\in[0,M)` and :math:`j\in[0,N)`.

        Returns:
            (M,D) or (N,D) array:

            The solution of the optimization problem, which is always a 
            **2d-array** with :math:`M` or :math:`N` lines (if **axis** = 1 
            or **axis** = 0, respectively) and a number of columns 
            that is inferred from the **formula**.

        """
        # Get tags
        tagCpuGpu, tag1D2D, _ = get_tag_backend(backend, args)
        varinv = args[self.varinvpos]

        if ranges is None: ranges = ()  # ranges should be encoded as a tuple

        def linop(var):
            newargs = args[:self.varinvpos] + (var, ) + args[self.varinvpos +
                                                             1:]
            res = self.myconv.genred_numpy(tagCpuGpu, tag1D2D, 0, device_id,
                                           ranges, *newargs)
            if alpha:
                res += alpha * var
            return res

        return ConjugateGradientSolver('numpy', linop, varinv, eps=eps)
コード例 #6
0
ファイル: generic_red.py プロジェクト: MrHuff/keops
    def __call__(self, *args, backend='auto', device_id=-1, ranges=None):
        r"""
        Apply the routine on arbitrary NumPy arrays.

        Warning:
            Even for variables of size 1 (e.g. :math:`a_i\in\mathbb{R}`
            for :math:`i\in[0,M)`), KeOps expects inputs to be formatted
            as 2d Tensors of size ``(M,dim)``. In practice,
            ``a.view(-1,1)`` should be used to turn a vector of weights
            into a *list of scalar values*.


        Args:
            *args (2d arrays (variables ``Vi(..)``, ``Vj(..)``) and 1d arrays (parameters ``Pm(..)``)): The input numerical arrays,
                which should all have the same ``dtype``, be **contiguous** and be stored on
                the **same device**. KeOps expects one array per alias,
                with the following compatibility rules:

                    - All ``Vi(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`M`.
                    - All ``Vj(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`N`.
                    - All ``Pm(Dim_k)`` variables are encoded as **1d-arrays** (vectors) of size ``Dim_k``.

        Keyword Args:
            backend (string): Specifies the map-reduce scheme.
                The supported values are:

                    - ``"auto"`` (default): let KeOps decide which backend is best suited to your data, based on the tensors' shapes. ``"GPU_1D"`` will be chosen in most cases.
                    - ``"CPU"``: use a simple C++ ``for`` loop on a single CPU core.
                    - ``"GPU_1D"``: use a `simple multithreading scheme <https://github.com/getkeops/keops/blob/master/keops/core/GpuConv1D.cu>`_ on the GPU - basically, one thread per value of the output index.
                    - ``"GPU_2D"``: use a more sophisticated `2D parallelization scheme <https://github.com/getkeops/keops/blob/master/keops/core/GpuConv2D.cu>`_ on the GPU.
                    - ``"GPU"``: let KeOps decide which one of the ``"GPU_1D"`` or the ``"GPU_2D"`` scheme will run faster on the given input.

            device_id (int, default=-1): Specifies the GPU that should be used
                to perform the computation; a negative value lets your system
                choose the default GPU. This parameter is only useful if your
                system has access to several GPUs.

            ranges (6-uple of integer arrays, None by default):
                Ranges of integers that specify a
                :doc:`block-sparse reduction scheme <../../sparsity>`
                with *Mc clusters along axis 0* and *Nc clusters along axis 1*.
                If None (default), we simply loop over all indices
                :math:`i\in[0,M)` and :math:`j\in[0,N)`.

                **The first three ranges** will be used if **axis** = 1
                (reduction along the axis of ":math:`j` variables"),
                and to compute gradients with respect to ``Vi(..)`` variables:

                    - ``ranges_i``, (Mc,2) integer array - slice indices
                      :math:`[\operatorname{start}^I_k,\operatorname{end}^I_k)` in :math:`[0,M]`
                      that specify our Mc blocks along the axis 0
                      of ":math:`i` variables".
                    - ``slices_i``, (Mc,) integer array - consecutive slice indices
                      :math:`[\operatorname{end}^S_1, ..., \operatorname{end}^S_{M_c}]`
                      that specify Mc ranges :math:`[\operatorname{start}^S_k,\operatorname{end}^S_k)` in ``redranges_j``,
                      with :math:`\operatorname{start}^S_k = \operatorname{end}^S_{k-1}`.
                      **The first 0 is implicit**, meaning that :math:`\operatorname{start}^S_0 = 0`, and we typically expect that
                      ``slices_i[-1] == len(redrange_j)``.
                    - ``redranges_j``, (Mcc,2) integer array - slice indices
                      :math:`[\operatorname{start}^J_l,\operatorname{end}^J_l)` in :math:`[0,N]`
                      that specify reduction ranges along the axis 1
                      of ":math:`j` variables".

                If **axis** = 1,
                these integer arrays allow us to say
                that ``for k in range(Mc)``, the output values for
                indices ``i in range( ranges_i[k,0], ranges_i[k,1] )``
                should be computed using a Map-Reduce scheme over
                indices ``j in Union( range( redranges_j[l, 0], redranges_j[l, 1] ))``
                for ``l in range( slices_i[k-1], slices_i[k] )``.

                **Likewise, the last three ranges** will be used if **axis** = 0
                (reduction along the axis of ":math:`i` variables"),
                and to compute gradients with respect to ``Vj(..)`` variables:

                    - ``ranges_j``, (Nc,2) integer array - slice indices
                      :math:`[\operatorname{start}^J_k,\operatorname{end}^J_k)` in :math:`[0,N]`
                      that specify our Nc blocks along the axis 1
                      of ":math:`j` variables".
                    - ``slices_j``, (Nc,) integer array - consecutive slice indices
                      :math:`[\operatorname{end}^S_1, ..., \operatorname{end}^S_{N_c}]`
                      that specify Nc ranges :math:`[\operatorname{start}^S_k,\operatorname{end}^S_k)` in ``redranges_i``,
                      with :math:`\operatorname{start}^S_k = \operatorname{end}^S_{k-1}`.
                      **The first 0 is implicit**, meaning that :math:`\operatorname{start}^S_0 = 0`, and we typically expect that
                      ``slices_j[-1] == len(redrange_i)``.
                    - ``redranges_i``, (Ncc,2) integer array - slice indices
                      :math:`[\operatorname{start}^I_l,\operatorname{end}^I_l)` in :math:`[0,M]`
                      that specify reduction ranges along the axis 0
                      of ":math:`i` variables".

                If **axis** = 0,
                these integer arrays allow us to say
                that ``for k in range(Nc)``, the output values for
                indices ``j in range( ranges_j[k,0], ranges_j[k,1] )``
                should be computed using a Map-Reduce scheme over
                indices ``i in Union( range( redranges_i[l, 0], redranges_i[l, 1] ))``
                for ``l in range( slices_j[k-1], slices_j[k] )``.

        Returns:
            (M,D) or (N,D) array:

            The output of the reduction,
            a **2d-tensor** with :math:`M` or :math:`N` lines (if **axis** = 1
            or **axis** = 0, respectively) and a number of columns
            that is inferred from the **formula**.
        """

        # Get tags
        tagCpuGpu, tag1D2D, _ = get_tag_backend(backend, args)
        if ranges is None:
            ranges = ()  # To keep the same type

        out = self.myconv.genred_numpy(tagCpuGpu, tag1D2D, 0, device_id,
                                       ranges, *args)

        nx, ny = get_sizes(self.aliases, *args)
        nout = nx if self.axis == 1 else ny
        return postprocess(out, "numpy", self.reduction_op, nout, self.opt_arg,
                           self.dtype)