Exemple #1
0
    def _axpbyz(self,
                selffac,
                other,
                otherfac,
                out,
                add_timer=None,
                stream=None):
        """Compute ``out = selffac * self + otherfac*other``,
        where `other` is a vector.."""
        assert self.shape == other.shape
        if not self.flags.forc or not other.flags.forc:
            raise RuntimeError("only contiguous arrays may "
                               "be used as arguments to this operation")

        func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype,
                                             out.dtype)

        if add_timer is not None:
            add_timer(
                3 * self.size,
                func.prepared_timed_call(self._grid, selffac, self.gpudata,
                                         otherfac, other.gpudata, out.gpudata,
                                         self.mem_size))
        else:
            func.prepared_async_call(self._grid, self._block, stream, selffac,
                                     self.gpudata, otherfac, other.gpudata,
                                     out.gpudata, self.mem_size)

        return out
Exemple #2
0
    def _axpbyz(self, selffac, other, otherfac, out, add_timer=None, stream=None):
        """Compute ``out = selffac * self + otherfac*other``,
        where `other` is a vector.."""
        assert self.shape == other.shape
        if not self.flags.forc or not other.flags.forc:
            raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation")

        func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype, out.dtype)

        if add_timer is not None:
            add_timer(
                3 * self.size,
                func.prepared_timed_call(
                    self._grid, selffac, self.gpudata, otherfac, other.gpudata, out.gpudata, self.mem_size
                ),
            )
        else:
            func.prepared_async_call(
                self._grid,
                self._block,
                stream,
                selffac,
                self.gpudata,
                otherfac,
                other.gpudata,
                out.gpudata,
                self.mem_size,
            )

        return out
Exemple #3
0
    def _axpbyz(self,
                selffac,
                other,
                otherfac,
                out,
                add_timer=None,
                stream=None):
        """Compute ``out = selffac * self + otherfac*other``,
        where `other` is a vector.."""
        assert self.shape == other.shape

        func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype,
                                             out.dtype)
        func.set_block_shape(*self._block)

        if add_timer is not None:
            add_timer(
                3 * self.size,
                func.prepared_timed_call(self._grid, selffac, self.gpudata,
                                         otherfac, other.gpudata, out.gpudata,
                                         self.mem_size))
        else:
            func.prepared_async_call(self._grid, stream, selffac, self.gpudata,
                                     otherfac, other.gpudata, out.gpudata,
                                     self.mem_size)

        return out
Exemple #4
0
    def _axpbyz(self, selffac, other, otherfac, out, add_timer=None, stream=None):
        """Compute ``out = selffac * self + otherfac*other``, 
        where `other` is a vector.."""
        assert self.shape == other.shape

        func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype, out.dtype)
        func.set_block_shape(*self._block)

        if add_timer is not None:
            add_timer(3*self.size, func.prepared_timed_call(self._grid, 
                selffac, self.gpudata, otherfac, other.gpudata, 
                out.gpudata, self.mem_size))
        else:
            func.prepared_async_call(self._grid, stream,
                    selffac, self.gpudata, otherfac, other.gpudata, 
                    out.gpudata, self.mem_size)

        return out