Exemple #1
0
def SART2DBackWard(grad, order, sp):
    grad_ = grad.copy()
    block1D = (8, 1)
    grid1D = ((sp['nBins'] + block1D[0] - 1) // block1D[0], 1)
    block2D = (8, 8)
    grid2D = ((sp['nWidth'] + block2D[0] - 1) // block2D[0],
              (sp['nHeight'] + block2D[1] - 1) // block2D[1])
    mod = cupy.RawModule(code=source_texref)
    AssignResidualError = mod.get_function('AssignResidualError_kernel')
    FpKernel = mod.get_function('fGetFp_kernel')
    # 2D texture
    channelDescImg = ChannelFormatDescriptor(
        32, 0, 0, 0, runtime.cudaChannelFormatKindFloat)
    cuArrayImg = CUDAarray(channelDescImg, sp['nWidth'], sp['nHeight'])
    resourceDescImg = ResourceDescriptor(runtime.cudaResourceTypeArray,
                                         cuArrayImg)
    address_modeImg = (runtime.cudaAddressModeClamp,
                       runtime.cudaAddressModeClamp)
    texDescImg = TextureDescriptor(address_modeImg,
                                   runtime.cudaFilterModePoint,
                                   runtime.cudaReadModeElementType)
    # 1D texture
    channelDesc1D = ChannelFormatDescriptor(32, 0, 0, 0,
                                            runtime.cudaChannelFormatKindFloat)
    cuArray1D = CUDAarray(channelDesc1D, sp['nBins'])
    resourceDesc1D = ResourceDescriptor(runtime.cudaResourceTypeArray,
                                        cuArray1D)
    address_mode1D = (runtime.cudaAddressModeClamp,
                      runtime.cudaAddressModeClamp)
    texDesc1D = TextureDescriptor(address_mode1D, runtime.cudaFilterModePoint,
                                  runtime.cudaReadModeElementType)
    d_fOneProj = cupy.zeros(sp['nBins'], cupy.float32)
    for v in range(sp['nViews']):
        nView = order[sp['nViews'] - 1 - v]
        fLambda = sp['fRotateDir'] * 2.0 * np.pi / float(
            sp['nNumAngle']) * float(nView + sp['nStartAngle'])
        fCosLambda = np.cos(fLambda)
        fSinLambda = np.sin(fLambda)
        # A*x
        cuArrayImg.copy_from(grad)
        TextureReference(mod.get_texref('texImage'), resourceDescImg,
                         texDescImg)
        args = (d_fOneProj, sp['nBins'], sp['fSod'], sp['fOdd'],
                sp['fCellSize'], sp['fPixelSize'], sp['fFovRadius'],
                fCosLambda, fSinLambda, nView, sp['fOffSet'],
                sp['fAngleOfSlope'])
        FpKernel(grid1D, block1D, args)
        # AT*A*x
        cuArray1D.copy_from(d_fOneProj)
        TextureReference(mod.get_texref('texFP'), resourceDesc1D, texDesc1D)
        AssignResidualErrorArgs = (grad, sp['nWidth'], sp['nHeight'],
                                   sp['nBins'], sp['fSod'], sp['fOdd'],
                                   sp['fCellSize'], sp['fPixelSize'],
                                   fCosLambda, fSinLambda, sp['fOffSet'],
                                   sp['fAngleOfSlope'], sp['relax_factor'])
        AssignResidualError(grid2D, block2D, AssignResidualErrorArgs)
        grad = grad_ - sp['relax_factor'] * grad
    return grad
Exemple #2
0
def SART2D(p, sp, order, x0):
    # x0 = xinit.copy()
    block1D = (8, 1)
    grid1D = ((sp['nBins'] + block1D[0] - 1) // block1D[0], 1)
    block2D = (8, 8)
    grid2D = ((sp['nWidth'] + block2D[0] - 1) // block2D[0],
              (sp['nHeight'] + block2D[1] - 1) // block2D[1])
    mod = cupy.RawModule(code=source_texref)
    fGetResiduals = mod.get_function('fGetResiduals')
    AssignResidualError = mod.get_function('AssignResidualError_kernel')

    channelDescImg = ChannelFormatDescriptor(
        32, 0, 0, 0, runtime.cudaChannelFormatKindFloat)
    cuArrayImg = CUDAarray(channelDescImg, sp['nWidth'], sp['nHeight'])
    resourceDescImg = ResourceDescriptor(runtime.cudaResourceTypeArray,
                                         cuArrayImg)
    address_modeImg = (runtime.cudaAddressModeClamp,
                       runtime.cudaAddressModeClamp)
    texDescImg = TextureDescriptor(address_modeImg,
                                   runtime.cudaFilterModePoint,
                                   runtime.cudaReadModeElementType)

    # 1D texture
    channelDesc1D = ChannelFormatDescriptor(32, 0, 0, 0,
                                            runtime.cudaChannelFormatKindFloat)
    cuArray1D = CUDAarray(channelDesc1D, sp['nBins'])
    resourceDesc1D = ResourceDescriptor(runtime.cudaResourceTypeArray,
                                        cuArray1D)
    address_mode1D = (runtime.cudaAddressModeClamp,
                      runtime.cudaAddressModeClamp)
    texDesc1D = TextureDescriptor(address_mode1D, runtime.cudaFilterModePoint,
                                  runtime.cudaReadModeElementType)
    d_fResidualsData = cupy.zeros(sp['nBins'], cupy.float32)

    for v in range(sp['nViews']):
        # print('{}\n'.format(v))
        nView = order[v]
        fLambda = sp['fRotateDir'] * 2.0 * np.pi / float(
            sp['nNumAngle']) * float(nView + sp['nStartAngle'])
        fCosLambda = np.cos(fLambda)
        fSinLambda = np.sin(fLambda)
        cuArrayImg.copy_from(x0)
        TextureReference(mod.get_texref('texImage'), resourceDescImg,
                         texDescImg)
        getErrArgs = (d_fResidualsData, p, sp['nBins'], sp['fSod'], sp['fOdd'],
                      sp['fCellSize'], sp['fPixelSize'], sp['fFovRadius'],
                      fCosLambda, fSinLambda, nView, sp['fOffSet'],
                      sp['fAngleOfSlope'])
        fGetResiduals(grid1D, block1D, getErrArgs)
        cuArray1D.copy_from(d_fResidualsData)
        TextureReference(mod.get_texref('texFP'), resourceDesc1D, texDesc1D)
        AssignResidualErrorArgs = (x0, sp['nWidth'], sp['nHeight'],
                                   sp['nBins'], sp['fSod'], sp['fOdd'],
                                   sp['fCellSize'], sp['fPixelSize'],
                                   fCosLambda, fSinLambda, sp['fOffSet'],
                                   sp['fAngleOfSlope'], sp['relax_factor'])
        AssignResidualError(grid2D, block2D, AssignResidualErrorArgs)
    return x0
Exemple #3
0
    def test_fetch_float4_texture(self):
        width = 47
        height = 39
        depth = 11
        n_channel = 4

        # generate input data and allocate output buffer
        in_shape = (depth, height, n_channel * width)
        out_shape = (depth, height, width)

        # prepare input, output, and texture memory
        tex_data = cupy.random.random(in_shape, dtype=cupy.float32)
        real_output_x = cupy.zeros(out_shape, dtype=cupy.float32)
        real_output_y = cupy.zeros(out_shape, dtype=cupy.float32)
        real_output_z = cupy.zeros(out_shape, dtype=cupy.float32)
        real_output_w = cupy.zeros(out_shape, dtype=cupy.float32)
        ch = ChannelFormatDescriptor(32, 32, 32, 32,
                                     runtime.cudaChannelFormatKindFloat)
        arr = CUDAarray(ch, width, height, depth)
        arr.copy_from(tex_data)

        # create resource and texture descriptors
        res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)
        address_mode = (runtime.cudaAddressModeClamp,
                        runtime.cudaAddressModeClamp)
        tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint,
                                runtime.cudaReadModeElementType)

        if self.target == 'object':
            # create a texture object
            texobj = TextureObject(res, tex)
            mod = cupy.RawModule(code=source_texobj)
        else:  # self.target == 'reference'
            mod = cupy.RawModule(code=source_texref)
            texrefPtr = mod.get_texref('texref3Df4')
            # bind texture ref to resource
            texref = TextureReference(texrefPtr, res, tex)  # noqa

        # get and launch the kernel
        ker_name = 'copyKernel3D_4ch'
        ker = mod.get_function(ker_name)
        block = (4, 4, 2)
        grid = ((width + block[0] - 1) // block[0],
                (height + block[1] - 1) // block[1],
                (depth + block[2] - 1) // block[2])
        args = (real_output_x, real_output_y, real_output_z, real_output_w)
        if self.target == 'object':
            args = args + (texobj, )
        args = args + (width, height, depth)
        ker(grid, block, args)

        # validate result
        assert (real_output_x == tex_data[..., 0::4]).all()
        assert (real_output_y == tex_data[..., 1::4]).all()
        assert (real_output_z == tex_data[..., 2::4]).all()
        assert (real_output_w == tex_data[..., 3::4]).all()
Exemple #4
0
    def test_fetch_float_texture(self):
        width, height, depth = self.dimensions
        dim = 3 if depth != 0 else 2 if height != 0 else 1

        if (self.mem_type == 'linear' and dim != 1) or \
           (self.mem_type == 'pitch2D' and dim != 2):
            pytest.skip('The test case {0} is inapplicable for {1} and thus '
                        'skipped.'.format(self.dimensions, self.mem_type))

        # generate input data and allocate output buffer
        shape = (depth, height, width) if dim == 3 else \
                (height, width) if dim == 2 else \
                (width,)

        # prepare input, output, and texture memory
        tex_data = cupy.random.random(shape, dtype=cupy.float32)
        real_output = cupy.zeros_like(tex_data)
        ch = ChannelFormatDescriptor(32, 0, 0, 0,
                                     runtime.cudaChannelFormatKindFloat)
        assert tex_data.flags['C_CONTIGUOUS']
        assert real_output.flags['C_CONTIGUOUS']
        if self.mem_type == 'CUDAarray':
            arr = CUDAarray(ch, width, height, depth)
            expected_output = cupy.zeros_like(tex_data)
            assert expected_output.flags['C_CONTIGUOUS']
            # test bidirectional copy
            arr.copy_from(tex_data)
            arr.copy_to(expected_output)
        else:  # linear are pitch2D are backed by ndarray
            arr = tex_data
            expected_output = tex_data

        # create resource and texture descriptors
        if self.mem_type == 'CUDAarray':
            res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)
        elif self.mem_type == 'linear':
            res = ResourceDescriptor(runtime.cudaResourceTypeLinear,
                                     arr=arr,
                                     chDesc=ch,
                                     sizeInBytes=arr.size * arr.dtype.itemsize)
        else:  # pitch2D
            # In this case, we rely on the fact that the hand-picked array
            # shape meets the alignment requirement. This is CUDA's limitation,
            # see CUDA Runtime API reference guide. "TexturePitchAlignment" is
            # assumed to be 32, which should be applicable for most devices.
            res = ResourceDescriptor(runtime.cudaResourceTypePitch2D,
                                     arr=arr,
                                     chDesc=ch,
                                     width=width,
                                     height=height,
                                     pitchInBytes=width * arr.dtype.itemsize)
        address_mode = (runtime.cudaAddressModeClamp,
                        runtime.cudaAddressModeClamp)
        tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint,
                                runtime.cudaReadModeElementType)

        if self.target == 'object':
            # create a texture object
            texobj = TextureObject(res, tex)
            mod = cupy.RawModule(code=source_texobj)
        else:  # self.target == 'reference'
            mod = cupy.RawModule(code=source_texref)
            texref_name = 'texref'
            texref_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D'
            texrefPtr = mod.get_texref(texref_name)
            # bind texture ref to resource
            texref = TextureReference(texrefPtr, res, tex)  # noqa

        # get and launch the kernel
        ker_name = 'copyKernel'
        ker_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D'
        ker_name += 'fetch' if self.mem_type == 'linear' else ''
        ker = mod.get_function(ker_name)
        block = (4, 4, 2) if dim == 3 else (4, 4) if dim == 2 else (4, )
        grid = ()
        args = (real_output, )
        if self.target == 'object':
            args = args + (texobj, )
        if dim >= 1:
            grid_x = (width + block[0] - 1) // block[0]
            grid = grid + (grid_x, )
            args = args + (width, )
        if dim >= 2:
            grid_y = (height + block[1] - 1) // block[1]
            grid = grid + (grid_y, )
            args = args + (height, )
        if dim == 3:
            grid_z = (depth + block[2] - 1) // block[2]
            grid = grid + (grid_z, )
            args = args + (depth, )
        ker(grid, block, args)

        # validate result
        assert (real_output == expected_output).all()