Example #1
0
def main():
    # setup OpenCL
    platforms = cl.get_platforms(
    )  # a platform corresponds to a driver (e.g. AMD, NVidia, Intel)
    platform = platforms[0]  # take first platform
    devices = platform.get_devices(
        cl.device_type.GPU)  # get GPU devices of selected platform
    device = devices[0]  # take first GPU
    context = cl.Context([device])  # put selected GPU into context object
    queue = cl.CommandQueue(
        context, device)  # create command queue for selected GPU and context

    # prepare data
    imgIn = cv2.imread('photographer.png', cv2.IMREAD_GRAYSCALE)

    rotation_angle = np.pi / 4
    cos_theta = np.cos(rotation_angle)
    sin_theta = np.sin(rotation_angle)

    # setup sampler
    sampler = cl.Sampler(context, True, cl.addressing_mode.REPEAT,
                         cl.filter_mode.NEAREST)

    # get shape of input image, allocate memory for output to which result can be copied to
    shape = imgIn.T.shape
    imgOut = np.empty_like(imgIn)

    # create image buffers which hold images for OpenCL
    imgInBuf = cl.image_from_array(context,
                                   ary=imgIn,
                                   mode="r",
                                   norm_int=True,
                                   num_channels=1)
    imgOutBuf = cl.image_from_array(context,
                                    ary=imgOut,
                                    mode="w",
                                    norm_int=True,
                                    num_channels=1)

    # load, compile and execute OpenCL program
    program = cl.Program(context, open('kernel.cl').read()).build()
    program.img_rotate(queue, shape, None, sampler, imgInBuf, imgOutBuf,
                       np.double(sin_theta), np.double(cos_theta))
    cl.enqueue_copy(
        queue,
        imgOut,
        imgOutBuf,
        origin=(0, 0),
        region=shape,
        is_blocking=True
    )  # wait until finished copying resulting image back from GPU to CPU

    # write output image
    cv2.imwrite('photographer_rotated.png', imgOut)

    # show images
    fig, ax = plt.subplots(1, 2)
    ax[0].imshow(imgIn, cmap='gray')
    ax[1].imshow(imgOut, cmap='gray')
    plt.show()
Example #2
0
    def __init__(self, queue, discr, dtype, allocator):
        context = queue.context
        self.discr = discr
        import pyopencl as cl

        self.allocator = allocator

        dtype4 = cl.array.vec.types[np.dtype(dtype), 4]

        l = discr.ldis
        drdsdt_unvec = np.zeros((l.Np, l.Np, 4), dtype)
        for i, mat in enumerate([l.Dr, l.Ds, l.Dt]):
            drdsdt_unvec[:, :, i] = mat

        self.drdsdt = cl.array.to_device(
            queue,
            drdsdt_unvec.view(dtype=dtype4)[:, :, 0].copy(order="F"))
        self.drdsdt_img = cl.image_from_array(
            context,
            drdsdt_unvec.view(dtype=dtype4)[:, :, 0])

        drst_dx_unvec = np.zeros((discr.K, 4), dtype)
        drst_dy_unvec = np.zeros((discr.K, 4), dtype)
        drst_dz_unvec = np.zeros((discr.K, 4), dtype)

        for i in range(3):
            drst_dx_unvec[:, i] = discr.drst_dxyz[i, 0][:, 0]
            drst_dy_unvec[:, i] = discr.drst_dxyz[i, 1][:, 0]
            drst_dz_unvec[:, i] = discr.drst_dxyz[i, 2][:, 0]

        self.drst_dx = cl.array.to_device(
            queue,
            drst_dx_unvec.view(dtype=dtype4)[:, 0])
        self.drst_dy = cl.array.to_device(
            queue,
            drst_dy_unvec.view(dtype=dtype4)[:, 0])
        self.drst_dz = cl.array.to_device(
            queue,
            drst_dz_unvec.view(dtype=dtype4)[:, 0])

        self.vmapP = cl.array.to_device(
            queue,
            discr.vmapP.astype(np.int32).copy().reshape(discr.K, -1))
        self.vmapM = cl.array.to_device(
            queue,
            discr.vmapM.astype(np.int32).copy().reshape(discr.K, -1))

        self.nx = cl.array.to_device(queue, discr.nx.astype(dtype))
        self.ny = cl.array.to_device(queue, discr.ny.astype(dtype))
        self.nz = cl.array.to_device(queue, discr.nz.astype(dtype))

        self.Fscale = cl.array.to_device(queue, discr.Fscale.astype(dtype))
        self.bc = cl.array.to_device(queue, discr.bc.astype(dtype))

        self.LIFT = cl.array.to_device(queue,
                                       l.LIFT.copy(order="F").astype(dtype))
        self.LIFT_img = cl.image_from_array(context, l.LIFT.astype(dtype))

        self.volume_events = []
        self.surface_events = []
Example #3
0
    def __call__(self, ctx, src, kernel1, kernel2):
        self.build(ctx)
        kernel1 = np.array(kernel1, copy=False, dtype=np.float32)
        kernel2 = np.array(kernel2, copy=False, dtype=np.float32)
        halflen = kernel1.shape[0] / 2
        kernel1_buf = cl.Buffer(self.ctx,
                                cl.mem_flags.READ_ONLY
                                | cl.mem_flags.COPY_HOST_PTR,
                                hostbuf=kernel1)
        kernel2_buf = cl.Buffer(self.ctx,
                                cl.mem_flags.READ_ONLY
                                | cl.mem_flags.COPY_HOST_PTR,
                                hostbuf=kernel2)

        inshape = src.shape
        src = np.asarray(src)
        dims = len(src.shape)
        assert dims > 1
        if dims == 2:
            assert src.shape[1] <= 4
            src = src.reshape(src.shape[0], 1, src.shape[1])
        else:
            assert src.shape[2] <= 4

        src_padded = np.zeros((src.shape[0] + 2 * halflen, 1, 4),
                              dtype=src.dtype)
        src_padded[
            halflen:-halflen, :, :src.shape[2]] = src[:, :, :src.shape[2]]

        src_padded[:halflen, :, :] = src_padded[halflen:halflen *
                                                2, :, :][::-1, ...]
        src_padded[-halflen:, :, :] = src_padded[-halflen *
                                                 2:-halflen, :, :][::-1, ...]

        norm = np.issubdtype(src.dtype, np.integer)
        src_buf = cl.image_from_array(self.ctx, src_padded, 4, norm_int=norm)
        dest = np.zeros((src.shape[0], src.shape[1], 4), dtype=src.dtype)
        dest_buf = cl.image_from_array(self.ctx,
                                       dest,
                                       4,
                                       mode="w",
                                       norm_int=norm)

        queue = cl.CommandQueue(self.ctx)
        self.prg.convolve1d2_naive(queue, (dest.shape[1], dest.shape[0]), None,
                                   src_buf, dest_buf, kernel1_buf, kernel2_buf,
                                   np.int32(halflen))
        cl.enqueue_copy(queue,
                        dest,
                        dest_buf,
                        origin=(0, 0),
                        region=(src.shape[1], src.shape[0])).wait()

        dest = dest[:, :, :src.shape[2]].reshape(inshape)

        src_buf.release()
        dest_buf.release()
        kernel1_buf.release()
        kernel2_buf.release()
        return dest
Example #4
0
 def __call__(self, ctx, src):
     self.build(ctx)
     src = np.asarray(src)
     src2 = np.zeros((src.shape[0], src.shape[1], 4), dtype=src.dtype)
     src2[:, :, 0:src.shape[2]] = src[:, :, 0:src.shape[2]]
     norm = np.issubdtype(src2.dtype, np.integer)
     src2_buf = cl.image_from_array(self.ctx, src2, 4, norm_int=norm)
     dest_buf = cl.image_from_array(self.ctx,
                                    src2,
                                    4,
                                    mode="w",
                                    norm_int=norm)
     dest = np.empty_like(src2)
     queue = cl.CommandQueue(self.ctx)
     self.prg.YCrCb2RGB(queue, (src2.shape[1], src2.shape[0]), None,
                        src2_buf, dest_buf)
     cl.enqueue_copy(queue,
                     dest,
                     dest_buf,
                     origin=(0, 0),
                     region=(src2.shape[1], src2.shape[0])).wait()
     dest = dest[:, :, 0:src.shape[2]].copy()
     src2_buf.release()
     dest_buf.release()
     return dest
Example #5
0
 def mask(self, mask):
     BaseCorrelator.mask.fset(self, mask)
     self._norm_factor = np.float32(self._norm_factor)
     self._rmax = np.int32(self._rmax)
     self._gtemplate = cl.image_from_array(
         self._queue.context, self._template.astype(np.float32))
     self._gmask = cl.image_from_array(self._queue.context,
                                       self._mask.astype(np.float32))
Example #6
0
def main():
    # setup OpenCL
    platforms = cl.get_platforms()  # a platform corresponds to a driver (e.g. AMD)
    platform = platforms[0]  # take first platform
    devices = platform.get_devices(cl.device_type.GPU)  # get GPU devices of selected platform
    device = devices[0]  # take first GPU
    context = cl.Context([device])  # put selected GPU into context object
    queue = cl.CommandQueue(context, device)  # create command queue for selected GPU and context

    # read image and setup convolution kernel image
    imgIn = cv2.imread('photographer.png', cv2.IMREAD_GRAYSCALE)
    tmp = [[1, 0, -1], [2, 0, -2], [1, 0, -1]]
    kernelImage = np.array(tmp, dtype=np.float32)  # dtype=np.float32, because defaults to dtype=np.float64, which is unsupported by OpenCL images
    # tmp = [[1, 2, 1], [2, 4, -2], [1, 2, 1]]
    # kernelImage = 1/16 * np.array(tmp, dtype=np.float32)  # dtype=np.float32, because defaults to dtype=np.float64, which is unsupported by OpenCL images
    # tmp = [[1, 4, 7, 4, 1],
    #        [4,16,26,16, 4],
    #        [7,26,41,26, 7],
    #        [4,16,26,16, 4],
    #        [1, 4, 7, 4, 1]]
    # kernelImage = 1/273 * np.array(tmp, dtype=np.float32)  # dtype=np.float32, because defaults to dtype=np.float64, which is unsupported by OpenCL images


    # get shape of input image, allocate memory for output to which result can be copied to
    shape = imgIn.T.shape
    imgOut = np.empty_like(imgIn)

    # create image buffers which hold images for OpenCL
    imgInBuf = cl.image_from_array(context, ary=imgIn, mode="r", norm_int=True, num_channels=1)
    kernelImageBuf = cl.image_from_array(context, ary=kernelImage, mode="r", norm_int=False, num_channels=1)
    imgOutBuf = cl.image_from_array(context, ary=imgOut, mode="w", norm_int=True, num_channels=1)

    # load and compile OpenCL program
    program = cl.Program(context, open('convolution_kernel_code.cl').read()).build()

    # execute kernel with same global shape as input image
    program.custom_convolution_2d(queue, shape, None, imgInBuf, kernelImageBuf, imgOutBuf)

    # copy back output buffer
    cl.enqueue_copy(queue, imgOut, imgOutBuf, origin=(0, 0), region=shape,
                    is_blocking=True)  # wait until finished copying resulting image back from GPU to CPU

    # save imgOut
    cv2.imwrite('photographer_convolved.png', imgOut)

    # show images
    fig, ax = plt.subplots(1, 2)
    ax[0].imshow(imgIn, cmap='gray')
    ax[1].imshow(imgOut, cmap='gray')
    plt.show()
Example #7
0
    def __init__(self, queue, discr, dtype, allocator):
        context = queue.context
        self.discr = discr
        import pyopencl as cl

        self.allocator = allocator

        dtype4 = cl.array.vec.types[np.dtype(dtype), 4]

        l = discr.ldis
        drdsdt_unvec = np.zeros((l.Np, l.Np, 4), dtype)
        for i, mat in enumerate([l.Dr, l.Ds, l.Dt]):
            drdsdt_unvec[:, :, i] = mat

        self.drdsdt = cl.array.to_device(
            queue,
            drdsdt_unvec
            .view(dtype=dtype4)[:, :, 0].copy(order="F"))
        self.drdsdt_img = cl.image_from_array(context, drdsdt_unvec.view(dtype=dtype4)[:, :, 0])

        drst_dx_unvec = np.zeros((discr.K, 4), dtype)
        drst_dy_unvec = np.zeros((discr.K, 4), dtype)
        drst_dz_unvec = np.zeros((discr.K, 4), dtype)

        for i in range(3):
            drst_dx_unvec[:, i] = discr.drst_dxyz[i, 0][:,0]
            drst_dy_unvec[:, i] = discr.drst_dxyz[i, 1][:,0]
            drst_dz_unvec[:, i] = discr.drst_dxyz[i, 2][:,0]

        self.drst_dx = cl.array.to_device(queue, drst_dx_unvec.view(dtype=dtype4)[:, 0])
        self.drst_dy = cl.array.to_device(queue, drst_dy_unvec.view(dtype=dtype4)[:, 0])
        self.drst_dz = cl.array.to_device(queue, drst_dz_unvec.view(dtype=dtype4)[:, 0])

        self.vmapP = cl.array.to_device(queue,
                discr.vmapP.astype(np.int32).copy().reshape(discr.K, -1))
        self.vmapM = cl.array.to_device(queue,
                discr.vmapM.astype(np.int32).copy().reshape(discr.K, -1))

        self.nx = cl.array.to_device(queue, discr.nx.astype(dtype))
        self.ny = cl.array.to_device(queue, discr.ny.astype(dtype))
        self.nz = cl.array.to_device(queue, discr.nz.astype(dtype))

        self.Fscale = cl.array.to_device(queue, discr.Fscale.astype(dtype))
        self.bc = cl.array.to_device(queue, discr.bc.astype(dtype))

        self.LIFT = cl.array.to_device(queue, l.LIFT.copy(order="F").astype(dtype))
        self.LIFT_img = cl.image_from_array(context, l.LIFT.astype(dtype))

        self.volume_events = []
        self.surface_events = []
Example #8
0
    def prepare_param_tables(self):
        filename = os.path.join(os.path.split(__file__)[0],
                                self.tabledatafile)
        d = np.load(filename)
        self.phases = d['coeffs_phase']
        Tx = self.preprocess_params(d['coeffs_x'])
        Ty = self.preprocess_params(d['coeffs_y'])

        if self.simple:
            Tx[:] = np.array([0., 2.65, 2.65, 1 ])[:,None,None] #x0, xi_p, xi_m, n 2.6/1 bzw. 3./1.1
            Ty[:] = np.array([0, 2., 2., 1.])[:,None,None]
        self.cl_params_x = cl.image_from_array(self.context, np.ascontiguousarray(Tx.T, np.float32), num_channels=4)
        self.cl_params_y = cl.image_from_array(self.context, np.ascontiguousarray(Ty.T, np.float32), num_channels=4)
        self.cl_table_E = cl.image_from_array(self.context, np.ascontiguousarray(self.calc_E().T, np.float32), num_channels=1)
    def _upload_image(self, image):
        assert image.max() <= 1.0

        # Check the number of channels in the image
        if image.ndim == 2:
            num_channels = 1
        else:
            if sys.platform.startswith('win') and 'geforce' in self.ctx.devices[0].name.lower() and image.shape[2] == 3:
                # This is a hack for Windows/nVidia, as we believe and found so
                # far for various GeFoce cards that the nvidia OpenCL
                # implementation sucks. Reporting an out-of-resources error when
                # trying to upload an RGB three channel image to the GPU
                # Quite counterintuitively adding an unneeded fourth channel
                # makes the out-of-resources error go away. FIXME if you can.
                tmp = image
                image = np.ones((tmp.shape[0], tmp.shape[1], 4))
                num_channels = 4
                image[:, :, :3] = tmp[:]
            else:
                num_channels = image.shape[2]

        # Tell OpenCL to copy the image into device memory
        image_gpu = cl.image_from_array(self.ctx, image.astype(np.float32),
                                        num_channels=num_channels, mode="r")
        return image_gpu
Example #10
0
    def find_starburst_ray_boundaries(self, im, seed_point, cutoff_index,
                                      threshold, n_rays, n_samples, ray_step):

        if self.cached_shape != im.shape:
            self.setup_device(im.shape)

        #(im_, _, _) = self.sobel3x3_separable(im.astype(np.float32))
        im_ = im.astype(np.float32)
        self.clIm2D = cl.image_from_array(self.ctx, im_, num_channels=1)
        # # load im to memory
        # cl.enqueue_copy(self.q, self.clIm2D, clIm.data, offset=0,
        #                 origin=(0, 0), region=clIm.shape)

        seed_point_ = (seed_point[1], seed_point[0])

        # sample the rays, computing the "ray-wise" gradient and mean + std along the
        # way.  We'll pull back the mean and running stds and compute the thresholds
        # on the CPU
        sampled = self.cl_find_ray_boundaries(self.clIm2D, n_rays,
                                     n_samples, ray_step, seed_point_, cutoff_index, threshold)

        # run through the resampled values to find cutoffs

        # pull back the cutoff and return them
        return sampled
Example #11
0
    def test_rotate_image3d_1(self):

        shape = (8, 6, 5)
        np_image = np.zeros(shape, dtype=np.float32)
        np_image[0, 0, 0] = 1
        np_image[0, 0, 1] = 1
        np_image[0, 0, 2] = 1

        # 90 degree rotation around z-axis
        rotmat = [[0, 1, 0], [1, 0, 0], [0, 0, 1]]

        np_out = np.zeros_like(np_image)

        expected = np.zeros_like(np_image)
        expected[0, 0, 0] = 1
        expected[0, 1, 0] = 1
        expected[0, 2, 0] = 1

        cl_image = cl.image_from_array(self.queue.context, np_image)
        cl_out = cl_array.to_device(self.queue, np_out)
        cl_sampler = cl.Sampler(self.queue.context, False,
                                cl.addressing_mode.CLAMP,
                                cl.filter_mode.LINEAR)

        self.kernels.rotate_image3d(self.queue, cl_sampler, cl_image, rotmat,
                                    cl_out)

        self.assertTrue(np.allclose(expected, cl_out.get()))
    def find_starburst_ray_boundaries(self, im, seed_point, cutoff_index,
                                      threshold, n_rays, n_samples, ray_step):

        if self.cached_shape != im.shape:
            self.setup_device(im.shape)

        #(im_, _, _) = self.sobel3x3_separable(im.astype(np.float32))
        im_ = im.astype(np.float32)
        self.clIm2D = cl.image_from_array(self.ctx, im_, num_channels=1)
        # # load im to memory
        # cl.enqueue_copy(self.q, self.clIm2D, clIm.data, offset=0,
        #                 origin=(0, 0), region=clIm.shape)

        seed_point_ = (seed_point[1], seed_point[0])

        # sample the rays, computing the "ray-wise" gradient and mean + std along the
        # way.  We'll pull back the mean and running stds and compute the thresholds
        # on the CPU
        sampled = self.cl_find_ray_boundaries(self.clIm2D, n_rays,
                                     n_samples, ray_step, seed_point_, cutoff_index, threshold)

        # run through the resampled values to find cutoffs

        # pull back the cutoff and return them
        return sampled
Example #13
0
 def __call__(self, ctx, src):
     self.build(ctx)
     src = np.asarray(src)
     src2 = np.zeros((src.shape[0], src.shape[1], 4),dtype=src.dtype)
     src2[:,:,0:src.shape[2]] = src[:,:,0:src.shape[2]]
     norm = np.issubdtype(src2.dtype, np.integer)
     src2_buf = cl.image_from_array(self.ctx, src2, 4, norm_int=norm)
     dest_buf = cl.image_from_array(self.ctx, src2, 4, mode="w", norm_int=norm)
     dest = np.empty_like(src2)
     queue = cl.CommandQueue(self.ctx)
     self.prg.YCrCb2RGB(queue, (src2.shape[1], src2.shape[0]), None, src2_buf, dest_buf)
     cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(src2.shape[1], src2.shape[0])).wait()
     dest = dest[:,:,0:src.shape[2]].copy()
     src2_buf.release()
     dest_buf.release()
     return dest
Example #14
0
File: cl.py Project: romi/RL_NBV
    def process_view(self, intrinsics, rot, tvec, mask):
        """Process a new view.
        Parameters
        ----------
        intrinsics: list
            [f_x, f_y, c_x, c_y]
        rot: list of list
            rotation matrix of the camera pose
        tvec: list
            translation vector of the camera pose
        mask: np.ndarray
            mask array (or float array if type is averaging)
        """
        if self.dtype == np.float32 and mask.dtype == np.uint8:
            mask = mask / 255

        intrinsics_h = np.ascontiguousarray(intrinsics).astype(np.float32)
        rot_h = np.ascontiguousarray(rot).astype(np.float32)
        tvec_h = np.ascontiguousarray(tvec).astype(np.float32)
        mask_h = np.ascontiguousarray(mask).astype(self.dtype)

        mask_d = cl.image_from_array(ctx, mask_h, 1)
        cl.enqueue_copy(queue, self.intrinsics_d, intrinsics_h)
        cl.enqueue_copy(queue, self.rot_d, rot_h)
        cl.enqueue_copy(queue, self.tvec_d, tvec_h)

        self.kernel(queue, [np.prod(self.shape)], None, mask_d, self.values_d,
                    self.intrinsics_d, self.rot_d, self.tvec_d, self.volinfo_d,
                    self.shape_d)
        queue.finish()
Example #15
0
    def setUpClass(self):

        self.queue = get_queue()
        self.shape = (4, 5, 6)
        self.size = 4 * 5 * 6
        self.values = {
            'shape_x': self.shape[2],
            'shape_y': self.shape[1],
            'shape_z': self.shape[0],
            'llength': 2,
        }
        self.k = CLKernels(self.queue.context, self.values)
        self.grid = np.zeros(self.shape, dtype=np.float64)
        self.grid[0, 0, 0] = 1
        self.grid[0, 0, 1] = 1
        self.grid[0, 1, 1] = 1
        self.grid[0, 0, 2] = 1
        self.grid[0, 0, -1] = 1
        self.grid[-1, 0, 0] = 1
        self.cl_image = cl.image_from_array(self.queue.context,
                                            self.grid.astype(np.float32))
        self.sampler_linear = cl.Sampler(self.queue.context, True,
                                         cl.addressing_mode.REPEAT,
                                         cl.filter_mode.LINEAR)
        self.sampler_nearest = cl.Sampler(self.queue.context, False,
                                          cl.addressing_mode.CLAMP,
                                          cl.filter_mode.NEAREST)
        self.out = np.zeros(self.shape, dtype=np.float64)
        self.cl_out = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
Example #16
0
    def test_rotate_image3d_1(self):
        
        shape = (8, 6, 5)
        np_image = np.zeros(shape, dtype=np.float32)
        np_image[0, 0, 0] = 1
        np_image[0, 0, 1] = 1
        np_image[0, 0, 2] = 1

        # 90 degree rotation around z-axis
        rotmat = [[0, 1, 0],
                  [1, 0, 0],
                  [0, 0, 1]]

        np_out = np.zeros_like(np_image)

        expected = np.zeros_like(np_image)
        expected[0, 0, 0] = 1
        expected[0, 1, 0] = 1
        expected[0, 2, 0] = 1

        cl_image = cl.image_from_array(self.queue.context, np_image)
        cl_out = cl_array.to_device(self.queue, np_out)
        cl_sampler = cl.Sampler(self.queue.context, False, cl.addressing_mode.CLAMP, cl.filter_mode.LINEAR)

        self.kernels.rotate_image3d(self.queue, cl_sampler, cl_image, rotmat, cl_out)

        self.assertTrue(np.allclose(expected, cl_out.get()))
Example #17
0
    def from_array(cls, arr, *args, **kwargs):

        ctx = get_device().context
        if not arr.ndim in [2, 3, 4]:
            raise ValueError(
                "dimension of array wrong, should be 1...4 but is %s" %
                arr.ndim)
        elif arr.ndim == 4:
            num_channels = arr.shape[-1]
        else:
            num_channels = 1

        if arr.dtype.type == np.complex64:
            num_channels = 2
            res = OCLImage.empty(arr.shape,
                                 dtype=np.float32,
                                 num_channels=num_channels)
            res.write_array(arr)
            res.dtype = np.float32
        else:
            res = cl.image_from_array(ctx,
                                      prepare(arr),
                                      num_channels=num_channels,
                                      *args,
                                      **kwargs)

            res.dtype = arr.dtype

        res.num_channels = num_channels

        return res
Example #18
0
 def mask(self, mask):
     BaseCorrelator.mask.fset(self, mask)
     self._norm_factor = np.float32(self._norm_factor)
     self._rmax = np.int32(self._rmax)
     self._gtemplate = cl.image_from_array(
             self._ctx, self._template.astype(np.float32)
             )
     self._gmask = cl.image_from_array(
             self._ctx, self._mask.astype(np.float32)
             )
     max_items = self._queue.device.max_compute_units * 32 * 16
     gws = [0] * 3
     gws[0] = min(2 * self._rmax, max_items)
     gws[1] = min(max_items // gws[0], 2 * self._rmax)
     gws[2] = min(max(max_items // (gws[0] * gws[0]), 1), 2 * self._rmax)
     self._gws = tuple(gws)
Example #19
0
    def test_clashvol(self):

        NROT = np.random.randint(self.rotations.shape[0] + 1)
        rotmat = self.rotations[NROT]
        cpu_lsurf = np.zeros_like(self.im_lsurf.array)
        disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)

        cpu_clashvol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rcore.array), s=self.shape)

        gpu_rcore = cl_array.to_device(self.queue, np.asarray(self.rcore.array, dtype=np.float32))
        gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
        gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)

        gpu_ft_lsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_rcore = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_clashvol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_clashvol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rfftn(self.queue, gpu_rcore, gpu_ft_rcore)
        self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
        self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rcore, gpu_ft_clashvol)
        self.kernels.irfftn(self.queue, gpu_ft_clashvol, gpu_clashvol)

        self.assertTrue(np.allclose(cpu_clashvol, gpu_clashvol.get(), atol=0.8))
Example #20
0
    def test_rotate_template_mask(self):
        shape = (5, 5, 5)
        template = np.zeros(shape, dtype=np.float32)
        template[2, 2, 1:4] = 1
        template[2, 1:4, 2] = 1
        rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7,
                            dtype=np.float32)

        self.queue.finish()
        cl_template = cl.image_from_array(self.queue.context, template)
        cl_out = cl_array.to_device(self.queue,
                                    np.zeros(shape, dtype=np.float32))
        center = np.asarray([2, 2, 2, 0], dtype=np.float32)
        shape = np.asarray([5, 5, 5, 125], dtype=np.int32)

        self.k.rotate_template(self.queue, (125, ), None, self.s_linear,
                               cl_template, rotmat, cl_out.data, center, shape)

        answer = np.zeros((5, 5, 5), dtype=np.float32)
        answer[0, 0, :2] = 1
        answer[0, 0, -1] = 1
        answer[0, :2, 0] = 1
        answer[0, -1, 0] = 1

        self.assertTrue(np.allclose(cl_out.get(), answer))
	def get_color(self, img):
		# OpenCL only supports RGBA images, not RGB, so add an alpha channel
		src = np.array(img.convert('RGBA'))
		src.shape = w, h, _ = img.width, img.height, 4

		w = int(w * self.SCALE_FACTOR)
		h = int(h * self.SCALE_FACTOR)

		local_size = self.max_work_item_sizes
		global_size = (math.ceil(h / local_size[0]), math.ceil(w / local_size[1]))

		total_work_groups = global_size[0] * global_size[1]

		mf = cl.mem_flags
		src_buf = cl.image_from_array(self.ctx, src, 4, norm_int=True)

		out = np.zeros(4 * total_work_groups, dtype=np.int32)
		out_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, size=out.itemsize * 4 * total_work_groups)

		kernel = self.prg.get_color
		kernel.set_scalar_arg_dtypes([None, None, np.uint32, np.uint32])
		kernel(self.queue, global_size, local_size, src_buf, out_buf, w, h, g_times_l=True)

		cl.enqueue_copy(self.queue, dest=out, src=out_buf, is_blocking=True)

		# this sum takes .1 ms at 3440x1440, don't even bother OpenCL-ifying it
		resized_out = np.reshape(out, (out.shape[0] / 4, 4))
		summed_out = np.sum(resized_out, axis=0)

		avg_out = (summed_out / summed_out[3])[:3].astype(int)

		return avg_out
Example #22
0
    def __call__(self, ctx, src, kernel):
        self.build(ctx)
        kernel = np.array(kernel, copy=False, dtype=np.float32)
        halflen = kernel.shape[0] / 2
        kernelf = kernel.flatten()
        kernelf_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=kernelf)

        src_padded = np.zeros((src.shape[0]+2*halflen, src.shape[1]+2*halflen, 4), dtype=src.dtype)
        src_padded[halflen:-halflen,halflen:-halflen,:src.shape[2]] = src[:,:,:src.shape[2]]

        src_padded[halflen:-halflen,:halflen,:src.shape[2]] = src_padded[halflen:-halflen,halflen:halflen*2,:src.shape[2]][:,::-1]
        src_padded[halflen:-halflen,-halflen:,:src.shape[2]] = src_padded[halflen:-halflen,-halflen*2:-halflen,:src.shape[2]][:,::-1]

        src_padded[:halflen,:,:src.shape[2]] = src_padded[halflen:halflen*2,:,:src.shape[2]][::-1,...]
        src_padded[-halflen:,:,:src.shape[2]] = src_padded[-halflen*2:-halflen,:,:src.shape[2]][::-1,...]

        norm = np.issubdtype(src.dtype, np.integer)
        src_buf = cl.image_from_array(self.ctx, src_padded, 4, norm_int=norm)
        dest = np.zeros((src.shape[0], src.shape[1], 4), dtype=src.dtype)
        dest_buf = init_image(self.ctx, dest, 4, mode="w", norm_int=norm)

        queue = cl.CommandQueue(self.ctx)
        self.prg.convolve2d_naive(queue, (dest.shape[1], dest.shape[0]), None, src_buf, dest_buf, kernelf_buf, np.int32(kernel.shape[0]))
        cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(src.shape[1], src.shape[0])).wait()

        # src_buf.release()
        # dest_buf.release()
        # kernelf_buf.release()
        return dest[:,:,:src.shape[2]]
Example #23
0
    def _gpu_init(self):
        """Method to initialize all the data for GPU-accelerate search"""

        self.gpu_data = {}
        g = self.gpu_data
        d = self.data
        q = self.queue

        # move data to the GPU. All should be float32, as these is the native
        # lenght for GPUs
        g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
        g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
        # Make the scanning chain object an Image, as this is faster to rotate
        g['im_lsurf'] = cl.image_from_array(q.context, float32array(d['lsurf'].array))
        g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
                                  cl.filter_mode.LINEAR)

        if self.distance_restraints:
            g['restraints'] = cl_array.to_device(q, float32array(d['restraints']))

        # Allocate arrays on the GPU
        g['lsurf'] = cl_array.zeros_like(g['rcore'])
        g['clashvol'] = cl_array.zeros_like(g['rcore'])
        g['intervol'] = cl_array.zeros_like(g['rcore'])
        g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)
        g['restspace'] = cl_array.zeros_like(g['interspace'])
        g['access_interspace'] = cl_array.zeros_like(g['interspace'])
        g['best_access_interspace'] = cl_array.zeros_like(g['interspace'])

        # arrays for counting
        # Reductions are typically tedious on GPU, and we need to define the
        # workgroupsize to allocate the correct amount of data
        WORKGROUPSIZE = 32
        nsubhists = int(np.ceil(g['rcore'].size/WORKGROUPSIZE))
        g['subhists'] = cl_array.zeros(q, (nsubhists, d['nrestraints'] + 1), dtype=np.float32)
        g['viol_counter'] = cl_array.zeros(q, (nsubhists, d['nrestraints'], d['nrestraints']), dtype=np.float32)

        # complex arrays
        g['ft_shape'] = list(d['shape'])
        g['ft_shape'][0] = d['shape'][0]//2 + 1
        g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
        g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])

        # other miscellanious data
        g['nrot'] = d['nrot']
        g['max_clash'] = d['max_clash']
        g['min_interaction'] = d['min_interaction']

        # kernels
        g['k'] = Kernels(q.context)
        g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
        g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])

        # initial calculations
        g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
        g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])
Example #24
0
    def frame_preprocessing(self, lower_bound, upper_bound):
        # *Load and convert source image
        frame = np.array(self.frame)

        # *Set properties
        h = frame.shape[0]
        w = frame.shape[1]
        mask = np.zeros((1, 2), cl.cltypes.float4)
        mask[0, 0] = (lower_bound)  # Lower bound
        mask[0, 1] = (upper_bound)  # Upper bound

        # *Buffors
        frame_buf = cl.image_from_array(GPUSetup.context, frame, 4)
        fmt = cl.ImageFormat(cl.channel_order.RGBA,
                             cl.channel_type.UNSIGNED_INT8)
        dest_buf = cl.Image(GPUSetup.context,
                            cl.mem_flags.WRITE_ONLY,
                            fmt,
                            shape=(w, h))

        # *RGB to HSV
        GPUSetup.program.rgb2hsv(GPUSetup.queue, (w, h), None, frame_buf,
                                 dest_buf)
        self.hsv = np.empty_like(frame)
        cl.enqueue_copy(GPUSetup.queue,
                        self.hsv,
                        dest_buf,
                        origin=(0, 0),
                        region=(w, h))

        # *Apply mask
        frame_buf = cl.image_from_array(GPUSetup.context, self.hsv, 4)
        mask_buf = cl.Buffer(GPUSetup.context,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=mask)
        GPUSetup.program.hsv_mask(GPUSetup.queue, (w, h), None, frame_buf,
                                  mask_buf, dest_buf)
        self.after_mask = np.empty_like(frame)
        cl.enqueue_copy(GPUSetup.queue,
                        self.after_mask,
                        dest_buf,
                        origin=(0, 0),
                        region=(w, h))
        return self.after_mask
Example #25
0
    def test_touch(self):

        MAX_CLASH = 100 + 0.9
        MIN_INTER = 300 + 0.9

        NROT = np.random.randint(self.rotations.shape[0] + 1)
        rotmat = self.rotations[0]
        cpu_lsurf = np.zeros_like(self.im_lsurf.array)
        disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)

        cpu_clashvol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rcore.array))

        gpu_rcore = cl_array.to_device(self.queue, np.asarray(self.rcore.array, dtype=np.float32))
        gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
        gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)

        gpu_ft_lsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_rcore = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_clashvol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_clashvol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rfftn(self.queue, gpu_rcore, gpu_ft_rcore)
        self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
        self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rcore, gpu_ft_clashvol)
        self.kernels.irfftn(self.queue, gpu_ft_clashvol, gpu_clashvol)
        
        cpu_intervol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rsurf.array))

        gpu_rsurf = cl_array.to_device(self.queue, np.asarray(self.rsurf.array, dtype=np.float32))

        gpu_ft_rsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_intervol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_intervol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        cpu_interspace = np.zeros(self.shape, dtype=np.int32)
        gpu_interspace = cl_array.zeros(self.queue, self.shape, dtype=np.int32)

        self.kernels.rfftn(self.queue, gpu_rsurf, gpu_ft_rsurf)
        self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
        self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rsurf, gpu_ft_intervol)
        self.kernels.irfftn(self.queue, gpu_ft_intervol, gpu_intervol)

        self.kernels.touch(self.queue, gpu_clashvol, MAX_CLASH, gpu_intervol, MIN_INTER, gpu_interspace)

        np.logical_and(cpu_clashvol < MAX_CLASH, cpu_intervol > MIN_INTER, cpu_interspace)

        disvis.volume.Volume(cpu_interspace, self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('cpu_interspace.mrc')
        disvis.volume.Volume(gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('gpu_interspace.mrc')
        disvis.volume.Volume(cpu_interspace - gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('diff.mrc')
        print()
        print(cpu_interspace.sum(), gpu_interspace.get().sum())
        print(np.abs(cpu_interspace - gpu_interspace.get()).sum())
                           

        self.assertTrue(np.allclose(gpu_interspace.get(), cpu_interspace))
Example #26
0
def _get_image_buffer(image):
    """
    Create the buffer object for a image
    :param image: PIL image object
    :return: CL buffer object
    """
    image = image.convert("RGBA")
    image = np.array(image)
    return cl.image_from_array(_context, image, num_channels=4, mode="r", norm_int=False)
Example #27
0
    def __call__(self, ctx, ix, iy, rx, ry, sw, sh, ez, ex, ey, levels,
                 halfres_eccentricity, contrast_sensitivity, decay_constant):
        self.build(ctx)

        w = 2 * ix
        h = 2 * iy

        assert levels == 6
        var = np.array(
            [0.849, 0.4245, 0.21225, 0.106125, 0.0530625, 0.02653125],
            dtype=np.float32)
        horizontal_degree = subtended_angle(ctx, [0], [ry], [2 * rx], [ry], rx,
                                            ry, sw, sh, [ez], [ex], [ey])[0]
        freq = 0.5 / (horizontal_degree / (2 * rx))

        critical_eccentricity = [0.0]
        for l in xrange(levels):
            ecc = halfres_eccentricity * ((np.log(1 / contrast_sensitivity) *
                                           (1 << l) /
                                           (decay_constant * freq)) - 1)
            if ecc > 90.0: ecc = 90.0
            critical_eccentricity.append(ecc)
        critical_eccentricity.append(90.0)
        critical_eccentricity = np.array(critical_eccentricity,
                                         dtype=np.float32)

        ce_buf = cl.Buffer(self.ctx,
                           cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                           hostbuf=critical_eccentricity)
        var_buf = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_ONLY
                            | cl.mem_flags.COPY_HOST_PTR,
                            hostbuf=var)
        dest = np.zeros((h, w, 4), dtype=np.float32)
        dest_buf = cl.image_from_array(self.ctx, dest, 4, mode="w")

        queue = cl.CommandQueue(self.ctx)
        self.prg.blendmap(queue, (dest.shape[1], dest.shape[0]), None,
                          ce_buf, var_buf, np.float32(ix), np.float32(iy),
                          np.float32(w), np.float32(h), np.float32(2 * sw),
                          np.float32(2 * sh), np.float32(ez), np.float32(ex),
                          np.float32(ey), np.uint32(levels), dest_buf)
        cl.enqueue_copy(queue,
                        dest,
                        dest_buf,
                        origin=(0, 0),
                        region=(dest.shape[1], dest.shape[0])).wait()

        dest = dest.copy()
        dest_buf.release()
        ce_buf.release()
        var_buf.release()

        return critical_eccentricity, dest
Example #28
0
    def cl_load_data(self, population, world):
        mf = cl.mem_flags


        out = cl.Buffer(self.ctx, mf.WRITE_ONLY, population.nbytes)

        population_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=population)

#        world_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=world.flatten())
        world_cl = cl.image_from_array(self.ctx, world, mode="r")
        
        return population_cl, world_cl, out
Example #29
0
  def __init__(self,volumeNode,contextPreference='GPU',renderSize=(512,512)):
    self.volumeNode = volumeNode
    self.volumeArray = slicer.util.array(self.volumeNode.GetID())
    self.renderSize = renderSize

    try:
      import pyopencl
      import numpy
    except ImportError:
      raise "No OpenCL for you!\nInstall pyopencl in slicer's python installation."

    import os
    os.environ['PYOPENCL_COMPILER_OUTPUT'] = '1'

    self.ctx = None
    for platform in pyopencl.get_platforms():
        for device in platform.get_devices():
            if pyopencl.device_type.to_string(device.type) == contextPreference:
               self.ctx = pyopencl.Context([device])
               break;

    if not self.ctx:
      self.ctx = pyopencl.create_some_context()
    self.queue = pyopencl.CommandQueue(self.ctx)

    inPath = os.path.dirname(slicer.modules.rendercl.path) + "/Render.cl.in"
    fp = open(inPath)
    sourceIn = fp.read()
    fp.close()

    source = sourceIn % { 
        'rayStepSize' : '0.01f',
        'rayMaxSteps' : '500',
        }
    self.prg = pyopencl.Program(self.ctx, source).build()

    # create a 3d image from the volume
    num_channels = 1
    self.volumeImage_dev = pyopencl.image_from_array(self.ctx, self.volumeArray, num_channels)

    # create a 2d array for the render buffer
    self.renderArray = numpy.zeros(self.renderSize,dtype=numpy.dtype('uint32'))
    self.renderArray_dev = pyopencl.array.to_device(self.queue, self.renderArray)

    self.volumeSampler = pyopencl.Sampler(self.ctx,False,
                              pyopencl.addressing_mode.REPEAT,
                              pyopencl.filter_mode.LINEAR)

    # TODO make 2D image of transfer function

    self.transferFunctionSampler = pyopencl.Sampler(self.ctx,False,
                              pyopencl.addressing_mode.REPEAT,
                              pyopencl.filter_mode.LINEAR)
Example #30
0
    def __call__(self, ctx, src, kernel1, kernel2):
        self.build(ctx)
        kernel1 = np.array(kernel1, copy=False, dtype=np.float32)
        kernel2 = np.array(kernel2, copy=False, dtype=np.float32)
        halflen = kernel1.shape[0] / 2
        kernel1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=kernel1)
        kernel2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=kernel2)

        inshape = src.shape
        src = np.asarray(src)
        dims = len(src.shape)
        assert dims > 1
        if dims == 2:
            assert src.shape[1] <= 4
            src = src.reshape(src.shape[0],1,src.shape[1])
        else:
            assert src.shape[2] <= 4

        src_padded = np.zeros((src.shape[0]+2*halflen, 1, 4), dtype=src.dtype)
        src_padded[halflen:-halflen,:,:src.shape[2]] = src[:,:,:src.shape[2]]

        src_padded[:halflen,:,:] = src_padded[halflen:halflen*2,:,:][::-1,...]
        src_padded[-halflen:,:,:] = src_padded[-halflen*2:-halflen,:,:][::-1,...]

        norm = np.issubdtype(src.dtype, np.integer)
        src_buf = cl.image_from_array(self.ctx, src_padded, 4, norm_int=norm)
        dest = np.zeros((src.shape[0], src.shape[1], 4), dtype=src.dtype)
        dest_buf = cl.image_from_array(self.ctx, dest, 4, mode="w", norm_int=norm)

        queue = cl.CommandQueue(self.ctx)
        self.prg.convolve1d2_naive(queue, (dest.shape[1], dest.shape[0]), None, src_buf, dest_buf, kernel1_buf, kernel2_buf, np.int32(halflen))
        cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(src.shape[1], src.shape[0])).wait()

        dest = dest[:,:,:src.shape[2]].reshape(inshape)

        src_buf.release()
        dest_buf.release()
        kernel1_buf.release()
        kernel2_buf.release()
        return dest
Example #31
0
 def from_array(cls,arr, *args, **kwargs):
     ctx = get_device().context
     if not arr.ndim in [1,2,3,4]:
         raise ValueError("dimension of array wrong, should be 1...4 but is %s"%arr.ndim)        
     elif arr.ndim == 4:
         num_channels = arr.shape[-1]
     else:
         num_channels = None
     
     res =  pyopencl.image_from_array(ctx, arr,num_channels = num_channels,
                                          *args, **kwargs)
     res.dtype = arr.dtype
     return res
Example #32
0
    def test_rotate_grids_and_multiply(self):
        shape = (5, 5, 5)
        template = np.zeros(shape, dtype=np.float32)
        template[2, 2, 1:4] = 1
        template[2, 1:4, 2] = 1
        mask = template * 2
        np_out_template = np.zeros(shape, dtype=np.float32)
        np_out_template[0, 0, :2] = 1
        np_out_template[0, 0, -1] = 1
        np_out_template[0, :2, 0] = 1
        np_out_template[0, -1, 0] = 1
        np_out_mask = np_out_template * 2
        np_out_mask2 = np_out_mask**2

        cl_template = cl.image_from_array(self.ctx, template)
        cl_mask = cl.image_from_array(self.ctx, mask)
        cl_rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7,
                               dtype=np.float32)
        cl_center = np.asarray([2, 2, 2, 0], dtype=np.float32)
        cl_shape = np.asarray([5, 5, 5, 125], dtype=np.int32)
        cl_radius = np.int32(2)

        cl_out_template = cl_array.to_device(self.queue,
                                             np.zeros(shape, dtype=np.float32))
        cl_out_mask = cl_array.to_device(self.queue,
                                         np.zeros(shape, dtype=np.float32))
        cl_out_mask2 = cl_array.to_device(self.queue,
                                          np.zeros(shape, dtype=np.float32))

        gws = tuple([int(2 * cl_radius + 1)] * 3)
        args = (cl_template, cl_mask, cl_rotmat, self.s_linear, self.s_nearest,
                cl_center, cl_shape, cl_radius, cl_out_template.data,
                cl_out_mask.data, cl_out_mask2.data)
        self.k.rotate_grids_and_multiply(self.queue, gws, None, *args)
        self.queue.finish()

        self.assertTrue(np.allclose(np_out_template, cl_out_template.get()))
        self.assertTrue(np.allclose(np_out_mask, cl_out_mask.get()))
        self.assertTrue(np.allclose(np_out_mask2, cl_out_mask2.get()))
Example #33
0
	def popCorn(self):
		mf = cl.mem_flags

		#initialize client side (CPU) arrays
		#self.a = numpy.array(range(10), dtype=numpy.float32)
		#self.b = numpy.array(range(10), dtype=numpy.float32)
		self.data = pyfits.getdata(os.path.join(self.path,self.file1))

		#create OpenCL buffers
		#self.imagebuf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ra1)
		
		self.imagebuf = cl.image_from_array(self.ctx,self.data)
		
		self.dest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, self.dec2.nbytes)
Example #34
0
    def __call__(self, ctx, pyramid, blendmap, x, y):
        self.build(ctx)

        norm = np.issubdtype(pyramid.dtype, np.integer)

        pyramid_buf = cl.image_from_array(self.ctx, pyramid, 4, mode="r", norm_int=norm)

        dest = np.zeros_like(pyramid[:,:,0,:],dtype=pyramid.dtype)
        dest_buf = init_image(self.ctx, dest, 4, mode="w", norm_int=norm)

        xoff = dest.shape[1] - x
        yoff = dest.shape[0] - y
        blendmap_buf = cl.image_from_array(self.ctx, blendmap[yoff:yoff+dest.shape[0],xoff:xoff+dest.shape[1],:].copy(), 4, mode="r")

        queue = cl.CommandQueue(self.ctx)
        self.prg.blend(queue, (dest.shape[1], dest.shape[0]), None,
                       pyramid_buf, blendmap_buf, dest_buf)
        cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(dest.shape[1], dest.shape[0])).wait()

        dest_buf.release()
        pyramid_buf.release()
        blendmap_buf.release()
        return dest
Example #35
0
 def __call__(self, ctx, src2, kernel):
     if self.ctx != ctx:
         self.ctx = ctx
         self.prg = cl.Program(self.ctx, pkg_resources.resource_string(__name__, "convolve2d.cl")).build()
     src2 = np.asarray(src2)
     src = np.zeros((src2.shape[0], src2.shape[1], 4),dtype=src2.dtype)
     src[:,:,0:src2.shape[2]] = src2[:,:,0:src2.shape[2]]
     norm = np.issubdtype(src.dtype, np.integer)
     src_buf = cl.image_from_array(self.ctx, src, 4, norm_int=norm)
     dest_buf = cl.image_from_array(self.ctx, src, 4, mode="w", norm_int=norm)
     dest = np.empty_like(src)
     kernel = np.array(kernel, dtype=np.float32)
     kernelf = kernel.flatten()
     kernelf_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=kernelf)
     halflen = (kernelf.shape[0]>>1)
     queue = cl.CommandQueue(self.ctx)
     self.prg.convolve2d_local(queue, (src.shape[1]-halflen, src.shape[0]-halflen), None, src_buf, dest_buf, kernelf_buf, np.int_(kernelf.shape[0]))
     cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(src.shape[1], src.shape[0])).wait()
     dest = dest[:,:,0:src2.shape[2]].copy()
     src_buf.release()
     dest_buf.release()
     kernelf_buf.release()
     return dest
Example #36
0
    def test_rotate_image3d(self):
        # CPU
        NROT = np.random.randint(self.rotations.shape[0] + 1)
        rotmat = self.rotations[NROT]

        #print(rotmat)

        cpu_lsurf = np.zeros_like(self.im_lsurf.array)
        disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)

        gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
        gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
        self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)

        self.assertTrue(np.allclose(cpu_lsurf, gpu_lsurf.get(), atol=0.01))
Example #37
0
    def __call__(self, ctx, pyramid, blendmap, x, y):
        self.build(ctx)

        norm = np.issubdtype(pyramid.dtype, np.integer)

        pyramid_buf = cl.image_from_array(self.ctx,
                                          pyramid,
                                          4,
                                          mode="r",
                                          norm_int=norm)

        dest = np.zeros_like(pyramid[:, :, 0, :], dtype=pyramid.dtype)
        dest_buf = init_image(self.ctx, dest, 4, mode="w", norm_int=norm)

        xoff = dest.shape[1] - x
        yoff = dest.shape[0] - y
        blendmap_buf = cl.image_from_array(
            self.ctx,
            blendmap[yoff:yoff + dest.shape[0],
                     xoff:xoff + dest.shape[1], :].copy(),
            4,
            mode="r")

        queue = cl.CommandQueue(self.ctx)
        self.prg.blend(queue, (dest.shape[1], dest.shape[0]), None,
                       pyramid_buf, blendmap_buf, dest_buf)
        cl.enqueue_copy(queue,
                        dest,
                        dest_buf,
                        origin=(0, 0),
                        region=(dest.shape[1], dest.shape[0])).wait()

        dest_buf.release()
        pyramid_buf.release()
        blendmap_buf.release()
        return dest
Example #38
0
    def test_rotate_grids_and_multiply(self):
        shape = (5, 5, 5)
        template = np.zeros(shape, dtype=np.float32)
        template[2, 2, 1:4] = 1
        template[2, 1:4, 2] = 1
        mask = template * 2
        np_out_template = np.zeros(shape, dtype=np.float32)
        np_out_template[0, 0, :2] = 1
        np_out_template[0, 0, -1] = 1
        np_out_template[0, :2, 0] = 1
        np_out_template[0, -1, 0] = 1
        np_out_mask = np_out_template * 2 
        np_out_mask2 = np_out_mask ** 2


        cl_template = cl.image_from_array(self.ctx, template)
        cl_mask = cl.image_from_array(self.ctx, mask)
        cl_rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7, dtype=np.float32)
        cl_center = np.asarray([2, 2, 2, 0], dtype=np.float32)
        cl_shape = np.asarray([5, 5, 5, 125], dtype=np.int32)
        cl_radius = np.int32(2)

        cl_out_template = cl_array.to_device(self.queue, np.zeros(shape, dtype=np.float32))
        cl_out_mask = cl_array.to_device(self.queue, np.zeros(shape, dtype=np.float32))
        cl_out_mask2 = cl_array.to_device(self.queue, np.zeros(shape, dtype=np.float32))

        gws = tuple([int(2 * cl_radius + 1)] * 3)
        args = (cl_template, cl_mask, cl_rotmat, self.s_linear, self.s_nearest,
                cl_center, cl_shape, cl_radius, cl_out_template.data, cl_out_mask.data,
                cl_out_mask2.data)
        self.k.rotate_grids_and_multiply(self.queue, gws, None, *args)
        self.queue.finish()

        self.assertTrue(np.allclose(np_out_template, cl_out_template.get()))
        self.assertTrue(np.allclose(np_out_mask, cl_out_mask.get()))
        self.assertTrue(np.allclose(np_out_mask2, cl_out_mask2.get()))
def cl_is_grey(p_rgb, p_cl_context: cl.Context, p_cl_queue: cl.CommandQueue,
               p_cl_program: cl.Program) -> bool:
    """Check if an image is grey using OpenCL"""
    pixels = numpy.array(p_rgb)
    dev_buf = cl.image_from_array(p_cl_context, pixels, 4)

    color = numpy.uint32(0)
    dev_color = cl.Buffer(p_cl_context,
                          cl.mem_flags.COPY_HOST_PTR,
                          hostbuf=color)

    p_cl_program.isGrey(p_cl_queue, (pixels.shape[0], pixels.shape[1]), None,
                        dev_buf, dev_color)

    nb_colors = numpy.empty_like(color)
    cl.enqueue_copy(p_cl_queue, nb_colors, dev_color)
    return bool(nb_colors == 0)
Example #40
0
    def __call__(self, ctx, src, kernel):
        self.build(ctx)
        kernel = np.array(kernel, copy=False, dtype=np.float32)
        halflen = kernel.shape[0] / 2
        kernelf = kernel.flatten()
        kernelf_buf = cl.Buffer(self.ctx,
                                cl.mem_flags.READ_ONLY
                                | cl.mem_flags.COPY_HOST_PTR,
                                hostbuf=kernelf)

        src_padded = np.zeros(
            (src.shape[0] + 2 * halflen, src.shape[1] + 2 * halflen, 4),
            dtype=src.dtype)
        src_padded[halflen:-halflen,
                   halflen:-halflen, :src.shape[2]] = src[:, :, :src.shape[2]]

        src_padded[halflen:-halflen, :halflen, :src.shape[2]] = src_padded[
            halflen:-halflen, halflen:halflen * 2, :src.shape[2]][:, ::-1]
        src_padded[halflen:-halflen, -halflen:, :src.shape[2]] = src_padded[
            halflen:-halflen, -halflen * 2:-halflen, :src.shape[2]][:, ::-1]

        src_padded[:halflen, :, :src.shape[2]] = src_padded[
            halflen:halflen * 2, :, :src.shape[2]][::-1, ...]
        src_padded[-halflen:, :, :src.shape[2]] = src_padded[
            -halflen * 2:-halflen, :, :src.shape[2]][::-1, ...]

        norm = np.issubdtype(src.dtype, np.integer)
        src_buf = cl.image_from_array(self.ctx, src_padded, 4, norm_int=norm)
        dest = np.zeros((src.shape[0], src.shape[1], 4), dtype=src.dtype)
        dest_buf = init_image(self.ctx, dest, 4, mode="w", norm_int=norm)

        queue = cl.CommandQueue(self.ctx)
        self.prg.convolve2d_naive(queue, (dest.shape[1], dest.shape[0]), None,
                                  src_buf, dest_buf, kernelf_buf,
                                  np.int32(kernel.shape[0]))
        cl.enqueue_copy(queue,
                        dest,
                        dest_buf,
                        origin=(0, 0),
                        region=(src.shape[1], src.shape[0])).wait()

        # src_buf.release()
        # dest_buf.release()
        # kernelf_buf.release()
        return dest[:, :, :src.shape[2]]
Example #41
0
    def from_array(cls, arr, *args, **kwargs):
        ctx = get_device().context
        if not arr.ndim in [1, 2, 3, 4]:
            raise ValueError(
                "dimension of array wrong, should be 1...4 but is %s" %
                arr.ndim)
        elif arr.ndim == 4:
            num_channels = arr.shape[-1]
        else:
            num_channels = None

        res = pyopencl.image_from_array(ctx,
                                        arr,
                                        num_channels=num_channels,
                                        *args,
                                        **kwargs)
        res.dtype = arr.dtype
        return res
def LoadImage(context, fileName):
    im = Image.open(fileName)
    img = np.array(im)
    IMG1 = scale_img(img, 8)
    im = Image.fromarray(IMG1)
    # Make sure the image is RGBA formatted
    if im.mode != "RGBA":
        im = im.convert("RGBA")
    IMG1 = np.array(im)
    if len(IMG1.shape) > 2:
        nchannels = IMG1.shape[-1]
    else:
        nchannels = None
    t0 = time.clock()
    clImage = cl.image_from_array(context, IMG1, num_channels=nchannels,
                                  mode="r", norm_int=False)
    t1 = time.clock()
    print t1 - t0, " Load to GPU..."
    return clImage, im.size, IMG1
Example #43
0
    def parallel_prediction_errors(self, image):
        """ Get the MILC prediction errors for a 3D image by means of OpenCL accelerated computation

            Keyword arguments:
            image --  a 3D numpy array (bitmap image)

            Return:
            a 3D numpy array of the same shape of "image", containing the prediction errors
        """

        mf = cl.mem_flags
        # Define the image format for the prediction errors
        err_format = cl.ImageFormat(channel_order=cl.channel_order.R,
                                    channel_type=DataType.CL_ERR.value)

        # Define the input image from the numpy 3D array
        source_image = cl.image_from_array(self.ctx, image)

        original_shape = numpy.shape(image)
        cl_shape = list(
            reversed(original_shape))  # inverted shape (pyOpenCL bug?)

        # output image
        output_image = cl.Image(self.ctx,
                                mf.WRITE_ONLY,
                                err_format,
                                shape=cl_shape)

        # sampler. pixels out of range have a value of '0'
        sampler = cl.Sampler(self.ctx, False, cl.addressing_mode.CLAMP,
                             cl.filter_mode.NEAREST)

        # enqueue kernel
        self.program.image_test(self.queue, original_shape, None, source_image,
                                output_image, sampler)

        # read the resulting image into a numpy array
        output_data = numpy.empty(shape=cl_shape, dtype=DataType.ERR.value)
        cl.enqueue_read_image(self.queue, output_image, (0, 0, 0), cl_shape,
                              output_data)

        return output_data.reshape(original_shape)
Example #44
0
    def test_intervol(self):

        NROT = np.random.randint(self.rotations.shape[0] + 1)
        rotmat = self.rotations[NROT]
        cpu_lsurf = np.zeros_like(self.im_lsurf.array)
        disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength,
                                        np.linalg.inv(rotmat), self.im_center,
                                        cpu_lsurf)

        cpu_intervol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() *
                                        numpy.fft.rfftn(self.rsurf.array),
                                        s=self.shape)

        gpu_rsurf = cl_array.to_device(
            self.queue, np.asarray(self.rsurf.array, dtype=np.float32))
        gpu_im_lsurf = cl.image_from_array(
            self.queue.context,
            np.asarray(self.im_lsurf.array, dtype=np.float32))
        gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf,
                                    rotmat, gpu_lsurf, self.im_center)

        gpu_ft_lsurf = cl_array.zeros(self.queue,
                                      self.ft_shape,
                                      dtype=np.complex64)
        gpu_ft_rsurf = cl_array.zeros(self.queue,
                                      self.ft_shape,
                                      dtype=np.complex64)
        gpu_ft_intervol = cl_array.zeros(self.queue,
                                         self.ft_shape,
                                         dtype=np.complex64)
        gpu_intervol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rfftn(self.queue, gpu_rsurf, gpu_ft_rsurf)
        self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
        self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rsurf,
                                     gpu_ft_intervol)
        self.kernels.irfftn(self.queue, gpu_ft_intervol, gpu_intervol)

        self.assertTrue(np.allclose(cpu_intervol, gpu_intervol.get(),
                                    atol=0.8))
Example #45
0
    def create_table(self, ctx, compile_options, nrow=200, ncol=1000):
        '''store the eos (ed, pr, T, s) in image2d_t table for fast
        linear interpolation,
        add some information to compile_options for EOS table'''
        import pyopencl as cl
        fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT)
        src = np.array(list(zip(self.cs2, self.pr, self.T, self.s)),
                       dtype=np.float32).reshape(nrow, ncol, 4)

        eos_table = cl.image_from_array(ctx, src, 4)
        compile_options.append(
            '-D EOS_ED_START={value}f'.format(value=self.ed_start))
        compile_options.append(
            '-D EOS_ED_STEP={value}f'.format(value=self.ed_step))
        compile_options.append(
            '-D EOS_NUM_ED={value}'.format(value=self.num_of_ed))
        compile_options.append('-D EOS_NUM_OF_ROWS=%s' % nrow)
        compile_options.append('-D EOS_NUM_OF_COLS=%s' % ncol)
        self.compile_options = compile_options
        return eos_table
Example #46
0
    def test_rotate_image3d(self):
        # CPU
        NROT = np.random.randint(self.rotations.shape[0] + 1)
        rotmat = self.rotations[NROT]

        #print(rotmat)

        cpu_lsurf = np.zeros_like(self.im_lsurf.array)
        disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength,
                                        np.linalg.inv(rotmat), self.im_center,
                                        cpu_lsurf)

        gpu_im_lsurf = cl.image_from_array(
            self.queue.context,
            np.asarray(self.im_lsurf.array, dtype=np.float32))
        gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
        self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf,
                                    rotmat, gpu_lsurf, self.im_center)

        self.assertTrue(np.allclose(cpu_lsurf, gpu_lsurf.get(), atol=0.01))
Example #47
0
 def __init__(self):
     self.angle = 0.
     self.ch_angles = {
         "Key_UP": pi / 18.,
         "Key_Down": -pi / 18.,
         "Key_Right": -pi / 180.,
         "Key_Left": pi / 180.
     }
     ctx = create_some_context()
     in_img = lena()
     h, w = map(int32, in_img.shape[:2])
     # in pyopencl 2018.2.2 channel orders other than RGBA
     # cause segmentation fault
     i4 = zeros((h, w, 4), dtype=uint8)
     i4[:, :, 0] = in_img
     self.in_img_buf = image_from_array(ctx, i4, 4)
     fmt = ImageFormat(CHO.RGBA, CHANNEL.UNSIGNED_INT8)
     self.out_img_buf = Image(ctx, MEM.WRITE_ONLY, fmt, shape=(w, h))
     prg = Program(ctx, load_cl_text("rotation.cl")).build()
     self.params = (ctx, self.in_img_buf, self.out_img_buf, h, w, prg)
def main():
    CL_CODE = '''
    constant float R_weight = 0.6;
    constant float G_weight = 0.4;
    constant float B_weight = 0.8;
    constant float ALL_weight = 1.8;
    constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
                                 CLK_ADDRESS_CLAMP |
                                 CLK_FILTER_NEAREST;

    kernel void gray(__read_only image2d_t src_img, __write_only image2d_t dst_img) {
        int x = get_global_id(0);
        int y = get_global_id(1);

        int2 coord = (int2)(x, y);
        uint4 pixel = read_imageui(src_img, sampler, coord);
        uint g = (uint)((pixel[0] * R_weight + pixel[1] * G_weight + pixel[2] * B_weight) / ALL_weight);
        pixel = g;
        pixel[3] = 255;
        write_imageui(dst_img, coord, pixel);
    }
    '''

    plf = [(cl.context_properties.PLATFORM, cl.get_platforms()[0])]
    ctx = cl.Context(dev_type=cl.device_type.GPU, properties=plf)
    prg = cl.Program(ctx, CL_CODE).build()
    queue = cl.CommandQueue(ctx)
    mf = cl.mem_flags

    src_raw = np.asarray(Image.open('res/tile-z16.png').convert("RGBA"))
    src_img = cl.image_from_array(ctx, src_raw, 4)
    (w, h, _) = src_raw.shape
    image_size = (w, h)

    fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8)
    dst_img = cl.Image(ctx, mf.WRITE_ONLY, fmt, shape=image_size)
    dst_raw = np.empty_like(src_raw)

    prg.gray(queue, image_size, (1, 1), src_img, dst_img)
    cl.enqueue_copy(queue, dst_raw, dst_img, origin=(0, 0), region=image_size)
    Image.fromarray(dst_raw).show()
Example #49
0
    def __call__(self, ctx, ix, iy, rx, ry, sw, sh, ez, ex, ey, levels, halfres_eccentricity, contrast_sensitivity, decay_constant):
        self.build(ctx)

        w = 2*ix
        h = 2*iy

        assert levels==6
        var = np.array([0.849, 0.4245, 0.21225, 0.106125, 0.0530625, 0.02653125], dtype=np.float32)
        horizontal_degree = subtended_angle(ctx,[0],[ry],[2*rx],[ry],rx,ry,sw,sh,[ez],[ex],[ey])[0]
        freq = 0.5/(horizontal_degree/(2*rx))

        critical_eccentricity = [0.0]
        for l in xrange(levels):
            ecc = halfres_eccentricity * ( (np.log(1/contrast_sensitivity)*(1<<l)/(decay_constant*freq))-1 )
            if ecc > 90.0: ecc = 90.0
            critical_eccentricity.append(ecc)
        critical_eccentricity.append(90.0)
        critical_eccentricity = np.array(critical_eccentricity, dtype=np.float32)

        ce_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=critical_eccentricity)
        var_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=var)
        dest = np.zeros((h, w, 4), dtype=np.float32)
        dest_buf = cl.image_from_array(self.ctx, dest, 4, mode="w")

        queue = cl.CommandQueue(self.ctx)
        self.prg.blendmap(queue, (dest.shape[1], dest.shape[0]), None,
                          ce_buf, var_buf,
                          np.float32(ix), np.float32(iy),
                          np.float32(w), np.float32(h),
                          np.float32(2*sw), np.float32(2*sh),
                          np.float32(ez), np.float32(ex), np.float32(ey),
                          np.uint32(levels),
                          dest_buf)
        cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(dest.shape[1], dest.shape[0])).wait()

        dest = dest.copy()
        dest_buf.release()
        ce_buf.release()
        var_buf.release()

        return critical_eccentricity, dest
Example #50
0
    def convert(self, img):
        src = numpy.fromstring(img.bits().asstring(img.byteCount()),
                               dtype=numpy.uint8)
        src.shape = h, w, _ = img.height(), img.width(), 4

        mf = cl.mem_flags
        src_buf = cl.image_from_array(self.ctx, src, 4)
        fmt = cl.ImageFormat(cl.channel_order.RGBA,
                             cl.channel_type.UNSIGNED_INT8)
        dest_buf = cl.Image(self.ctx, mf.WRITE_ONLY, fmt, shape=(w, h))

        self.prg.convert(self.queue, (w, h), None, src_buf, dest_buf,
                         numpy.int32(w), numpy.int32(h))

        dest = numpy.empty_like(src)
        cl.enqueue_copy(self.queue,
                        dest,
                        dest_buf,
                        origin=(0, 0),
                        region=(w, h))
        return QtGui.QImage(str(dest.data), w, h, QtGui.QImage.Format_RGB32)
Example #51
0
    def test_rotate_template_mask(self):
        shape = (5, 5, 5)
        template = np.zeros(shape, dtype=np.float32)
        template[2, 2, 1:4] = 1
        template[2, 1:4, 2] = 1
        rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7, dtype=np.float32)

        self.queue.finish()
        cl_template = cl.image_from_array(self.queue.context, template)
        cl_out = cl_array.to_device(self.queue, np.zeros(shape, dtype=np.float32))
        center = np.asarray([2, 2, 2, 0], dtype=np.float32)
        shape = np.asarray([5, 5, 5, 125], dtype=np.int32)

        self.k.rotate_template(self.queue, (125,), None, self.s_linear,
                cl_template, rotmat, cl_out.data, center, shape)

        answer = np.zeros((5, 5, 5), dtype=np.float32)
        answer[0, 0, :2] = 1
        answer[0, 0, -1] = 1
        answer[0, :2, 0] = 1
        answer[0, -1, 0] = 1

        self.assertTrue(np.allclose(cl_out.get(), answer))
Example #52
0
    def template_preprocesing(self):
        names = []
        for filename in glob.iglob(os.getcwd() + '/templates/*.png',
                                   recursive=True):
            names.append(filename)

        for name in sorted(names, key=self.sort_filenames):
            template = cv2.cvtColor(cv2.imread(name), cv2.COLOR_RGB2RGBA)
            self.templates.append(template)

            h = template.shape[0]
            w = template.shape[1]

            # *Buffors
            template_buf = cl.image_from_array(GPUSetup.context, template, 4)
            fmt = cl.ImageFormat(cl.channel_order.RGBA,
                                 cl.channel_type.UNSIGNED_INT8)
            dest_buf = cl.Image(GPUSetup.context,
                                cl.mem_flags.WRITE_ONLY,
                                fmt,
                                shape=(w, h))

            # *RGB to HSV
            GPUSetup.program.rgb2hsv(GPUSetup.queue, (w, h), None,
                                     template_buf, dest_buf)
            template_hsv = np.empty_like(template)
            cl.enqueue_copy(GPUSetup.queue,
                            template_hsv,
                            dest_buf,
                            origin=(0, 0),
                            region=(w, h))
            self.templates_hsv.append(template_hsv)

            # *Apply masks
            template_mask = self.clear_sign(template_hsv, template)
            self.templates_mask.append(template_mask)
Example #53
0
def to_cl_image_8192(ctx, values, fill=0):
    """
    Convert the given Nx3 array <values> to an RGBA image with a data type
    that is compatible to the datatype of <values>, with min(len(values),
    8192) values along its axis 1, and the necessary number of values along
    axis 0, such that the image is filled putting values[0] into result[0,
    0], values[1] into result[0, 1], etc. Excess image values and all
    values of the A channel are filled with the given <fill> value
    (defaults to 0).
    
    Note that, for whatever reason, OpenCL image axis 0 maps to array axis
    1 and vice versa (cf. help(cl._cl.Image)) -- which should not make a
    difference if the result of this function is used transparently.
    
    Return the resulting <pyopencl._cl.Image> instance.
    """
    assert values.shape[1] == 3    
    num_values = len(values)
    assert num_values > 0

    # Determine image dimensions
    img_dim_1 = np.minimum(8192, num_values)
    img_dim_0 = (int(num_values - 1) >> 13) + 1  # 2 ** 13 = 8192
    
    # Create appropriately sized Numpy array, then fill it
    values_2d = np.asarray(np.ones((img_dim_1 * img_dim_0, 4),
                                   dtype=values.dtype) * fill,
                           dtype=values.dtype)
    values_2d[:num_values, :3] = values
    values_2d = values_2d.reshape(img_dim_0, img_dim_1, 4)
    values_2d = np.swapaxes(values_2d, 0, 1)
    values_2d = np.require(values_2d, requirements=["A", "C"])
    
    # Create <cl._cl.Image> instance
    values_cl = cl.image_from_array(ctx, values_2d, num_channels=4, mode="r")
    return values_cl
Example #54
0
    def from_array(cls,arr, *args, **kwargs):

        ctx = get_device().context
        if not arr.ndim in [2,3,4]:
            raise ValueError("dimension of array wrong, should be 1...4 but is %s"%arr.ndim)
        elif arr.ndim == 4:
            num_channels = arr.shape[-1]
        else:
            num_channels = None

        if arr.dtype.type == np.complex64:
            num_channels = 2
            res =  OCLImage.empty(arr.shape,dtype = np.float32, num_channels=num_channels)
            res.write_array(arr)
            res.dtype = np.float32
        else:
            res =  cl.image_from_array(ctx, arr,num_channels = num_channels,
                                             *args, **kwargs)

            res.dtype = arr.dtype

        res.num_channels = num_channels

        return res
Example #55
0
def test_image_3d(ctx_factory):
    #test for image_from_array for 3d image of float2
    context = ctx_factory()

    device, = context.devices

    if not device.image_support:
        from pytest import skip
        skip("images not supported on %s" % device)

    if device.platform.vendor == "Intel(R) Corporation":
        from pytest import skip
        skip("images crashy on %s" % device)
    _skip_if_pocl(device.platform, 'pocl does not support CL_ADDRESS_CLAMP')

    prg = cl.Program(context, """
        __kernel void copy_image_plane(
          __global float2 *dest,
          __read_only image3d_t src,
          sampler_t samp,
          int stride0,
          int stride1)
        {
          int d0 = get_global_id(0);
          int d1 = get_global_id(1);
          int d2 = get_global_id(2);
          /*
          const sampler_t samp =
            CLK_NORMALIZED_COORDS_FALSE
            | CLK_ADDRESS_CLAMP
            | CLK_FILTER_NEAREST;
            */
          dest[d0*stride0 + d1*stride1 + d2] = read_imagef(
                src, samp, (float4)(d2, d1, d0, 0)).xy;
        }
        """).build()

    num_channels = 2
    shape = (3, 4, 2)
    a = np.random.random(shape + (num_channels,)).astype(np.float32)

    queue = cl.CommandQueue(context)
    try:
        a_img = cl.image_from_array(context, a, num_channels)
    except cl.RuntimeError:
        import sys
        exc = sys.exc_info()[1]
        if exc.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED:
            from pytest import skip
            skip("required image format not supported on %s" % device.name)
        else:
            raise

    a_dest = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)

    samp = cl.Sampler(context, False,
            cl.addressing_mode.CLAMP,
            cl.filter_mode.NEAREST)
    prg.copy_image_plane(queue, shape, None, a_dest, a_img, samp,
                         np.int32(a.strides[0]/a.itemsize/num_channels),
                         np.int32(a.strides[1]/a.itemsize/num_channels),
                         )

    a_result = np.empty_like(a)
    cl.enqueue_copy(queue, a_result, a_dest)

    good = la.norm(a_result - a) == 0
    if not good:
        if queue.device.type & cl.device_type.CPU:
            assert good, ("The image implementation on your CPU CL platform '%s' "
                    "returned bad values. This is bad, but common."
                    % queue.device.platform)
        else:
            assert good
Example #56
0
import cv2

def loadProgram(filename):
    with open(filename, 'r') as f:
        return "".join(f.readlines())

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
build_opts = "-I."
RGB2YCrCb = cl.Program(ctx, loadProgram("RGB2YCrCb.cl")).build(build_opts).RGB2YCrCb
YCrCb2RGB = cl.Program(ctx, loadProgram("YCrCb2RGB.cl")).build(build_opts).YCrCb2RGB

fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8)

src = cv2.cvtColor(cv2.imread("PM5544_with_non-PAL_signals.png",cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGBA)
src_buf = cl.image_from_array(ctx, src, 4)
dest_buf = cl.Image(ctx, cl.mem_flags.WRITE_ONLY, fmt, shape=(src.shape[1],src.shape[0]))

RGB2YCrCb(queue, (src.shape[1],src.shape[0]), None, src_buf, dest_buf)

dest = np.empty_like(src)
cl.enqueue_copy(queue, dest, dest_buf, origin=(0, 0), region=(src.shape[1],src.shape[0]))

Y = cv2.merge((dest[:,:,0],dest[:,:,0],dest[:,:,0]))
Cr = cv2.merge((dest[:,:,1],dest[:,:,1],dest[:,:,1]))
Cb = cv2.merge((dest[:,:,2],dest[:,:,2],dest[:,:,2]))

src2_buf = cl.image_from_array(ctx, dest, 4)
dest2_buf = cl.Image(ctx, cl.mem_flags.WRITE_ONLY, fmt, shape=(src.shape[1],src.shape[0]))

YCrCb2RGB(queue, (src.shape[1],src.shape[0]), None, src2_buf, dest2_buf)
Example #57
0
def make_ref_args(kernel, impl_arg_info, queue, parameters):
    import pyopencl as cl
    import pyopencl.array as cl_array

    from loopy.kernel.data import ValueArg, GlobalArg, ImageArg, TemporaryVariable

    from pymbolic import evaluate

    ref_args = {}
    ref_arg_data = []

    for arg in impl_arg_info:
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if arg.arg_class is ValueArg:
            if arg.offset_for_name:
                continue

            arg_value = parameters[arg.name]

            try:
                argv_dtype = arg_value.dtype
            except AttributeError:
                argv_dtype = None

            if argv_dtype != arg.dtype:
                arg_value = arg.dtype.numpy_dtype.type(arg_value)

            ref_args[arg.name] = arg_value

            ref_arg_data.append(None)

        elif arg.arg_class is GlobalArg or arg.arg_class is ImageArg:
            if arg.shape is None or any(saxis is None for saxis in arg.shape):
                raise LoopyError("array '%s' needs known shape to use automatic "
                        "testing" % arg.name)

            shape = evaluate_shape(arg.unvec_shape, parameters)
            dtype = kernel_arg.dtype

            is_output = arg.base_name in kernel.get_written_variables()

            if arg.arg_class is ImageArg:
                storage_array = ary = cl_array.empty(
                        queue, shape, dtype, order="C")
                numpy_strides = None
                alloc_size = None
                strides = None
            else:
                strides = evaluate(arg.unvec_strides, parameters)

                from pytools import all
                assert all(s > 0 for s in strides)
                alloc_size = sum(astrd*(alen-1)
                        for alen, astrd in zip(shape, strides)) + 1

                if dtype is None:
                    raise LoopyError("dtype for argument '%s' is not yet "
                            "known. Perhaps you want to use "
                            "loopy.add_dtypes "
                            "or loopy.infer_argument_dtypes?"
                            % arg.name)

                itemsize = dtype.itemsize
                numpy_strides = [itemsize*s for s in strides]

                storage_array = cl_array.empty(queue, alloc_size, dtype)

            if is_output and arg.arg_class is ImageArg:
                raise LoopyError("write-mode images not supported in "
                        "automatic testing")

            fill_rand(storage_array)

            if arg.arg_class is ImageArg:
                # must be contiguous
                pre_run_ary = pre_run_storage_array = storage_array.copy()

                ref_args[arg.name] = cl.image_from_array(
                        queue.context, ary.get())
            else:
                pre_run_storage_array = storage_array.copy()

                ary = cl_array.as_strided(storage_array, shape, numpy_strides)
                pre_run_ary = cl_array.as_strided(
                        pre_run_storage_array, shape, numpy_strides)
                ref_args[arg.name] = ary

            ref_arg_data.append(
                    TestArgInfo(
                        name=arg.name,
                        ref_array=ary,
                        ref_storage_array=storage_array,

                        ref_pre_run_array=pre_run_ary,
                        ref_pre_run_storage_array=pre_run_storage_array,

                        ref_shape=shape,
                        ref_strides=strides,
                        ref_alloc_size=alloc_size,
                        ref_numpy_strides=numpy_strides,
                        needs_checking=is_output))

        elif arg.arg_class is TemporaryVariable:
            # global temporary, handled by invocation logic
            pass

        else:
            raise LoopyError("arg type not understood")

    return ref_args, ref_arg_data
Example #58
0
def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters):
    import pyopencl as cl
    import pyopencl.array as cl_array

    from loopy.kernel.data import ValueArg, GlobalArg, ImageArg, TemporaryVariable

    from pymbolic import evaluate

    args = {}
    for arg, arg_desc in zip(impl_arg_info, ref_arg_data):
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if arg.arg_class is ValueArg:
            arg_value = parameters[arg.name]

            try:
                argv_dtype = arg_value.dtype
            except AttributeError:
                argv_dtype = None

            if argv_dtype != arg.dtype:
                arg_value = arg.dtype.numpy_dtype.type(arg_value)

            args[arg.name] = arg_value

        elif arg.arg_class is ImageArg:
            if arg.name in kernel.get_written_variables():
                raise NotImplementedError("write-mode images not supported in "
                        "automatic testing")

            shape = evaluate_shape(arg.unvec_shape, parameters)
            assert shape == arg_desc.ref_shape

            # must be contiguous
            args[arg.name] = cl.image_from_array(
                    queue.context, arg_desc.ref_pre_run_array.get())

        elif arg.arg_class is GlobalArg:
            shape = evaluate(arg.unvec_shape, parameters)
            strides = evaluate(arg.unvec_strides, parameters)

            dtype = kernel_arg.dtype
            itemsize = dtype.itemsize
            numpy_strides = [itemsize*s for s in strides]

            assert all(s > 0 for s in strides)
            alloc_size = sum(astrd*(alen-1)
                    for alen, astrd in zip(shape, strides)) + 1

            # use contiguous array to transfer to host
            host_ref_contig_array = arg_desc.ref_pre_run_storage_array.get()

            # use device shape/strides
            from pyopencl.compyte.array import as_strided
            host_ref_array = as_strided(host_ref_contig_array,
                    arg_desc.ref_shape, arg_desc.ref_numpy_strides)

            # flatten the thing
            host_ref_flat_array = host_ref_array.flatten()

            # create host array with test shape (but not strides)
            host_contig_array = np.empty(shape, dtype=dtype)

            common_len = min(
                    len(host_ref_flat_array),
                    len(host_contig_array.ravel()))
            host_contig_array.ravel()[:common_len] = \
                    host_ref_flat_array[:common_len]

            # create host array with test shape and storage layout
            host_storage_array = np.empty(alloc_size, dtype)
            host_array = as_strided(
                    host_storage_array, shape, numpy_strides)
            host_array[...] = host_contig_array

            host_contig_array = arg_desc.ref_storage_array.get()
            storage_array = cl_array.to_device(queue, host_storage_array)
            ary = cl_array.as_strided(storage_array, shape, numpy_strides)

            args[arg.name] = ary

            arg_desc.test_storage_array = storage_array
            arg_desc.test_array = ary
            arg_desc.test_shape = shape
            arg_desc.test_strides = strides
            arg_desc.test_numpy_strides = numpy_strides
            arg_desc.test_alloc_size = alloc_size

        elif arg.arg_class is TemporaryVariable:
            # global temporary, handled by invocation logic
            pass

        else:
            raise LoopyError("arg type not understood")

    return args
Example #59
0
# Build program in the specified context using the kernel source code
prog = cl.Program(context, kernel_src)
try:
    prog.build(options=['-Werror', '-DSCALE={}'.format(SCALE_FACTOR)], devices=[dev], cache_dir=None)
except:
    print('Build log:')
    print(prog.get_build_info(dev, cl.program_build_info.LOG))
    raise

# Data and buffers
im_src = imread('input_car.png').astype(dtype=np.uint16)
shape_dst = (im_src.shape[0]*SCALE_FACTOR, im_src.shape[1]*SCALE_FACTOR)
im_dst = np.empty(shape=shape_dst, dtype=np.uint16)

src_buff = cl.image_from_array(context, im_src, mode='r')
dst_buff = cl.image_from_array(context, im_dst, mode='w')

# Enqueue kernel
# Note: Global indices is reversed due to OpenCL using column-major order when reading images
global_size = im_src.shape[::-1]
local_size = None

# __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None, g_times_l=False)
prog.interp(queue, global_size, local_size, src_buff, dst_buff)

# Enqueue command to copy from buffers to host memory
# Note: Region indices is reversed due to OpenCL using column-major order when reading images
cl.enqueue_copy(queue, dest=im_dst, src=dst_buff, is_blocking=True, origin=(0, 0), region=im_dst.shape[::-1])

# Plot images with built-in scaling disabled
Example #60
0
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

"""
import pyopencl,pyopencl.array
import numpy
ctx = pyopencl.create_some_context()
queue = pyopencl.CommandQueue(ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
x, y, z = numpy.ogrid[-10:10:0.05, -10:10:0.05, -10:10:0.05]
r=numpy.sqrt(x*x+y*y+z*z)
data = ((x * x - y * y + z * z) * numpy.exp(-r)).astype("float32")
gpu_vol = pyopencl.image_from_array(ctx, data, 1)
shape = (500, 500)
img = numpy.empty(shape,dtype=numpy.float32)
gpu_img = pyopencl.array.empty(queue, shape, numpy.float32)
prg = open("interpolation.cl").read()
sampler = pyopencl.Sampler(ctx,
                           True, # normalized coordinates
                           pyopencl.addressing_mode.CLAMP_TO_EDGE,
                           pyopencl.filter_mode.LINEAR)

prg = pyopencl.Program(ctx, prg).build()
n = pyopencl.array.to_device(queue, numpy.array([1, 1, 1], dtype=numpy.float32))
c = pyopencl.array.to_device(queue, numpy.array([0.5, 0.5, 0.5], dtype=numpy.float32))
prg.interpolate(queue, (512, 512), (16, 16), gpu_vol, sampler, gpu_img.data,
                numpy.int32(shape[1]), numpy.int32(shape[1]), c.data, n.data)
img = gpu_img.get()