Esempi in Python per TextureObject, esempi in Python per cupy.cuda.texture.TextureObject

Esempio n. 1

0

Mostra file

File: test_userkernel.py Progetto: viantirreau/cupy

    def _prep_texture(self):
        width, height, depth = self.dimensions
        dim = 3 if depth != 0 else 2 if height != 0 else 1

        # generate input data and allocate output buffer
        shape = (depth, height, width) if dim == 3 else \
                (height, width) if dim == 2 else \
                (width,)
        self.shape = shape

        # prepare input, output, and texture memory
        # self.data holds the data stored in the texture memory
        tex_data = cupy.random.random(shape, dtype=cupy.float32)
        ch = ChannelFormatDescriptor(32, 0, 0, 0,
                                     runtime.cudaChannelFormatKindFloat)
        arr = CUDAarray(ch, width, height, depth)
        arr.copy_from(tex_data)
        self.data = tex_data

        # create resource and texture descriptors
        res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)
        address_mode = (runtime.cudaAddressModeClamp,
                        runtime.cudaAddressModeClamp)
        tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint,
                                runtime.cudaReadModeElementType)

        # create a texture object
        return TextureObject(res, tex)

Esempio n. 2

0

Mostra file

def deskew(data, angle, dx, dz, rotate=True, return_resolution=True, out=None):
    """
    Args:
        data (ndarray): 3-D array to apply deskew
        angle (float): angle between the objective and coverslip, in degree
        dx (float): X resolution
        dz (float): Z resolution
        rotate (bool, optional): rotate and crop the output
        return_resolution (bool, optional): return deskewed X/Z resolution
        out (ndarray, optional): array to store the result
    """
    angle = radians(angle)

    # shift along X axis, in pixels
    shift = dz * cos(angle) / dx
    logger.debug(f"layer shift: {shift:.04f} px")

    # estimate new size
    nw, nv, nu = data.shape
    nz, ny, nx = nw, nv, nu + ceil(shift * (nw - 1))

    # upload texture
    ch = ChannelFormatDescriptor(32, 0, 0, 0,
                                 runtime.cudaChannelFormatKindFloat)
    arr = CUDAarray(ch, nu, nw)
    res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)

    address_mode = (runtime.cudaAddressModeBorder,
                    runtime.cudaAddressModeBorder)
    tex = TextureDescriptor(address_mode, runtime.cudaFilterModeLinear,
                            runtime.cudaReadModeElementType)

    # transpose
    data = np.swapaxes(data, 0, 1)
    data = np.ascontiguousarray(data)

    data_in = data.astype(np.float32)
    data_out = cp.empty((ny, nz, nx), np.float32)
    for i, layer in enumerate(data_in):
        arr.copy_from(layer)  # TODO use stream
        texobj = TextureObject(res, tex)

        kernels["shear_kernel"](
            (ceil(nx / 16), ceil(nz / 16)),
            (16, 16),
            (data_out[i, ...], texobj, nx, nz, nu, np.float32(shift)),
        )

    data_out = cp.swapaxes(data_out, 0, 1)
    data_out = cp.asnumpy(data_out)
    data_out = data_out.astype(data.dtype)

    if return_resolution:
        # new resolution
        dz *= sin(angle)
        return data_out, (dz, dx)
    else:
        return data_out

Esempio n. 3

0

Mostra file

File: test_texture.py Progetto: viantirreau/cupy

    def test_fetch_float4_texture(self):
        width = 47
        height = 39
        depth = 11
        n_channel = 4

        # generate input data and allocate output buffer
        in_shape = (depth, height, n_channel * width)
        out_shape = (depth, height, width)

        # prepare input, output, and texture memory
        tex_data = cupy.random.random(in_shape, dtype=cupy.float32)
        real_output_x = cupy.zeros(out_shape, dtype=cupy.float32)
        real_output_y = cupy.zeros(out_shape, dtype=cupy.float32)
        real_output_z = cupy.zeros(out_shape, dtype=cupy.float32)
        real_output_w = cupy.zeros(out_shape, dtype=cupy.float32)
        ch = ChannelFormatDescriptor(32, 32, 32, 32,
                                     runtime.cudaChannelFormatKindFloat)
        arr = CUDAarray(ch, width, height, depth)
        arr.copy_from(tex_data)

        # create resource and texture descriptors
        res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)
        address_mode = (runtime.cudaAddressModeClamp,
                        runtime.cudaAddressModeClamp)
        tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint,
                                runtime.cudaReadModeElementType)

        if self.target == 'object':
            # create a texture object
            texobj = TextureObject(res, tex)
            mod = cupy.RawModule(code=source_texobj)
        else:  # self.target == 'reference'
            mod = cupy.RawModule(code=source_texref)
            texrefPtr = mod.get_texref('texref3Df4')
            # bind texture ref to resource
            texref = TextureReference(texrefPtr, res, tex)  # noqa

        # get and launch the kernel
        ker_name = 'copyKernel3D_4ch'
        ker = mod.get_function(ker_name)
        block = (4, 4, 2)
        grid = ((width + block[0] - 1) // block[0],
                (height + block[1] - 1) // block[1],
                (depth + block[2] - 1) // block[2])
        args = (real_output_x, real_output_y, real_output_z, real_output_w)
        if self.target == 'object':
            args = args + (texobj, )
        args = args + (width, height, depth)
        ker(grid, block, args)

        # validate result
        assert (real_output_x == tex_data[..., 0::4]).all()
        assert (real_output_y == tex_data[..., 1::4]).all()
        assert (real_output_z == tex_data[..., 2::4]).all()
        assert (real_output_w == tex_data[..., 3::4]).all()

Esempio n. 4

0

Mostra file

File: test_texture.py Progetto: viantirreau/cupy

    def test_fetch_float_texture(self):
        width, height, depth = self.dimensions
        dim = 3 if depth != 0 else 2 if height != 0 else 1

        if (self.mem_type == 'linear' and dim != 1) or \
           (self.mem_type == 'pitch2D' and dim != 2):
            pytest.skip('The test case {0} is inapplicable for {1} and thus '
                        'skipped.'.format(self.dimensions, self.mem_type))

        # generate input data and allocate output buffer
        shape = (depth, height, width) if dim == 3 else \
                (height, width) if dim == 2 else \
                (width,)

        # prepare input, output, and texture memory
        tex_data = cupy.random.random(shape, dtype=cupy.float32)
        real_output = cupy.zeros_like(tex_data)
        ch = ChannelFormatDescriptor(32, 0, 0, 0,
                                     runtime.cudaChannelFormatKindFloat)
        assert tex_data.flags['C_CONTIGUOUS']
        assert real_output.flags['C_CONTIGUOUS']
        if self.mem_type == 'CUDAarray':
            arr = CUDAarray(ch, width, height, depth)
            expected_output = cupy.zeros_like(tex_data)
            assert expected_output.flags['C_CONTIGUOUS']
            # test bidirectional copy
            arr.copy_from(tex_data)
            arr.copy_to(expected_output)
        else:  # linear are pitch2D are backed by ndarray
            arr = tex_data
            expected_output = tex_data

        # create resource and texture descriptors
        if self.mem_type == 'CUDAarray':
            res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)
        elif self.mem_type == 'linear':
            res = ResourceDescriptor(runtime.cudaResourceTypeLinear,
                                     arr=arr,
                                     chDesc=ch,
                                     sizeInBytes=arr.size * arr.dtype.itemsize)
        else:  # pitch2D
            # In this case, we rely on the fact that the hand-picked array
            # shape meets the alignment requirement. This is CUDA's limitation,
            # see CUDA Runtime API reference guide. "TexturePitchAlignment" is
            # assumed to be 32, which should be applicable for most devices.
            res = ResourceDescriptor(runtime.cudaResourceTypePitch2D,
                                     arr=arr,
                                     chDesc=ch,
                                     width=width,
                                     height=height,
                                     pitchInBytes=width * arr.dtype.itemsize)
        address_mode = (runtime.cudaAddressModeClamp,
                        runtime.cudaAddressModeClamp)
        tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint,
                                runtime.cudaReadModeElementType)

        if self.target == 'object':
            # create a texture object
            texobj = TextureObject(res, tex)
            mod = cupy.RawModule(code=source_texobj)
        else:  # self.target == 'reference'
            mod = cupy.RawModule(code=source_texref)
            texref_name = 'texref'
            texref_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D'
            texrefPtr = mod.get_texref(texref_name)
            # bind texture ref to resource
            texref = TextureReference(texrefPtr, res, tex)  # noqa

        # get and launch the kernel
        ker_name = 'copyKernel'
        ker_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D'
        ker_name += 'fetch' if self.mem_type == 'linear' else ''
        ker = mod.get_function(ker_name)
        block = (4, 4, 2) if dim == 3 else (4, 4) if dim == 2 else (4, )
        grid = ()
        args = (real_output, )
        if self.target == 'object':
            args = args + (texobj, )
        if dim >= 1:
            grid_x = (width + block[0] - 1) // block[0]
            grid = grid + (grid_x, )
            args = args + (width, )
        if dim >= 2:
            grid_y = (height + block[1] - 1) // block[1]
            grid = grid + (grid_y, )
            args = args + (height, )
        if dim == 3:
            grid_z = (depth + block[2] - 1) // block[2]
            grid = grid + (grid_z, )
            args = args + (depth, )
        ker(grid, block, args)

        # validate result
        assert (real_output == expected_output).all()