Esempio n. 1
0
    def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
                 ctx=None, devicetype="all", platformid=None, deviceid=None,
                 profile=False
                 ):
        ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
                                         axis_position=axis_position, angles=angles,
                                         ctx=ctx, devicetype=devicetype, platformid=platformid,
                                         deviceid=deviceid, profile=profile)
        self.compute_preconditioners()

        # Create a LinAlg instance
        self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
        # Positivity constraint
        self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);")
        # Projection onto the L-infinity ball of radius Lambda
        self.elwise_proj_linf = ElementwiseKernel(
            self.ctx,
            "float2* a, float Lambda",
            "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
            "elwise_proj_linf"
        )
        # Additional arrays
        self.linalg.gradient(self.d_x)
        self.d_p = parray.zeros_like(self.linalg.cl_mem["d_gradient"])
        self.d_q = parray.zeros_like(self.d_data)
        self.d_g = self.linalg.d_image
        self.d_tmp = parray.zeros_like(self.d_x)
        self.add_to_cl_mem({
            "d_p": self.d_p,
            "d_q": self.d_q,
            "d_tmp": self.d_tmp,
        })

        self.theta = 1.0
Esempio n. 2
0
    def test_adj_inplace(self):
        inpgrad = clarray.to_device(self.queue, self.symgradin)
        inpdiv = clarray.to_device(self.queue, self.symdivin)

        outgrad = clarray.zeros_like(inpdiv)
        outdiv = clarray.zeros_like(inpgrad)

        outgrad.add_event(self.symgrad.fwd(outgrad, inpgrad))
        outdiv.add_event(self.symgrad.adj(outdiv, inpdiv))

        outgrad = outgrad.get()
        outdiv = outdiv.get()

        a1 = np.vdot(
            outgrad[..., :3].flatten(),
            self.symdivin[..., :3].flatten()) / self.symgradin.size * 4
        a2 = 2 * np.vdot(
            outgrad[..., 3:6].flatten(),
            self.symdivin[..., 3:6].flatten()) / self.symgradin.size * 4
        a = a1 + a2
        b = np.vdot(self.symgradin[..., :3].flatten(),
                    -outdiv[..., :3].flatten()) / self.symgradin.size * 4

        print("Adjointness: %.2e +1j %.2e" % ((a - b).real, (a - b).imag))

        np.testing.assert_allclose(a, b, rtol=RTOL, atol=ATOL)
Esempio n. 3
0
    def _setupVariables(self, x, data):
        data = clarray.to_device(self._queue[0], data.astype(self._DTYPE))

        step_in = {}
        step_out = {}
        tmp_results = {}

        step_in["x"] = clarray.to_device(self._queue[0], x)
        step_in["xold"] = clarray.to_device(self._queue[0], x)
        step_in["xk"] = step_in["x"].copy()

        step_out["x"] = clarray.zeros_like(step_in["x"])

        tmp_results["gradFx"] = step_in["x"].copy()
        tmp_results["DADA"] = clarray.zeros_like(step_in["x"])
        tmp_results["DAd"] = clarray.zeros_like(step_in["x"])
        tmp_results["d"] = data.copy()
        tmp_results["Ax"] = clarray.zeros_like(data)

        tmp_results["temp_reg"] = clarray.zeros_like(step_in["x"])
        tmp_results["gradx"] = clarray.zeros(
            self._queue[0], step_in["x"].shape + (4,), dtype=self._DTYPE
        )

        tmp_results["reg_norm"] = clarray.zeros(
            self._queue[0],
            step_in["x"].shape + (2,),
            dtype=self._DTYPE_real,
        )
        tmp_results["reg"] = clarray.zeros(
            self._queue[0], step_in["x"].shape, dtype=self._DTYPE_real
        )
        return (step_out, tmp_results, step_in, data)
Esempio n. 4
0
    def update(self, xd, yd, zd, vxd, vyd, vzd, qd, md, forces,
            t, dt, num_steps):

        axd = cl_array.zeros_like(xd)
        ayd = cl_array.zeros_like(xd)
        azd = cl_array.zeros_like(xd)

        for i in range(num_steps):

            # First half of position advance
            xd += (0.5 * dt) * vxd
            yd += (0.5 * dt) * vyd
            zd += (0.5 * dt) * vzd

            t += 0.5 * dt

            axd.fill(0.0, self.queue)
            ayd.fill(0.0, self.queue)
            azd.fill(0.0, self.queue)
            for acc in forces:
                acc.computeAcc(xd, yd, zd, vxd, vyd, vzd, qd, md,
                        axd, ayd, azd, t)
            vxd += dt * axd
            vyd += dt * ayd
            vzd += dt * azd

            # Second half of position advance
            xd += (0.5 * dt) * vxd
            yd += (0.5 * dt) * vyd
            zd += (0.5 * dt) * vzd

            t += 0.5 * dt

        return t
Esempio n. 5
0
    def test_adj_inplace(self):
        inpgrad = clarray.to_device(self.queue, self.symgradin)
        inpdiv = clarray.to_device(self.queue, self.symdivin)

        outgrad = clarray.zeros_like(inpdiv)
        outdiv = clarray.zeros_like(inpgrad)

        self.symgrad.fwd(outgrad, inpgrad)
        self.symgrad.adj(outdiv, inpdiv)

        outgrad = outgrad.get()
        outdiv = outdiv.get()

        a1 = np.vdot(
            outgrad[..., :3].flatten(),
            self.symdivin[..., :3].flatten()) / self.symgradin.size * 4
        a2 = 2 * np.vdot(
            outgrad[..., 3:6].flatten(),
            self.symdivin[..., 3:6].flatten()) / self.symgradin.size * 4
        a = a1 + a2
        b = np.vdot(self.symgradin.flatten(),
                    -outdiv.flatten()) / self.symgradin.size * 4

        print("Adjointness: %.2e +1j %.2e" % ((a - b).real, (a - b).imag))

        self.assertAlmostEqual(a, b, places=12)
Esempio n. 6
0
    def update(self, xd, yd, zd, vxd, vyd, vzd, qd, md, forces, t, dt,
               num_steps):

        axd = cl_array.zeros_like(xd)
        ayd = cl_array.zeros_like(xd)
        azd = cl_array.zeros_like(xd)

        for i in range(num_steps):

            # First half of position advance
            xd += (0.5 * dt) * vxd
            yd += (0.5 * dt) * vyd
            zd += (0.5 * dt) * vzd

            t += 0.5 * dt

            axd.fill(0.0, self.queue)
            ayd.fill(0.0, self.queue)
            azd.fill(0.0, self.queue)
            for acc in forces:
                acc.computeAcc(xd, yd, zd, vxd, vyd, vzd, qd, md, axd, ayd,
                               azd, t)
            vxd += dt * axd
            vyd += dt * ayd
            vzd += dt * azd

            # Second half of position advance
            xd += (0.5 * dt) * vxd
            yd += (0.5 * dt) * vyd
            zd += (0.5 * dt) * vzd

            t += 0.5 * dt

        return t
Esempio n. 7
0
    def _gpu_init(self):
        """Method to initialize all the data for GPU-accelerate search"""

        self.gpu_data = {}
        g = self.gpu_data
        d = self.data
        q = self.queue

        # move data to the GPU. All should be float32, as these is the native
        # lenght for GPUs
        g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
        g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
        # Make the scanning chain object an Image, as this is faster to rotate
        g['im_lsurf'] = cl.image_from_array(q.context, float32array(d['lsurf'].array))
        g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
                                  cl.filter_mode.LINEAR)

        if self.distance_restraints:
            g['restraints'] = cl_array.to_device(q, float32array(d['restraints']))

        # Allocate arrays on the GPU
        g['lsurf'] = cl_array.zeros_like(g['rcore'])
        g['clashvol'] = cl_array.zeros_like(g['rcore'])
        g['intervol'] = cl_array.zeros_like(g['rcore'])
        g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)
        g['restspace'] = cl_array.zeros_like(g['interspace'])
        g['access_interspace'] = cl_array.zeros_like(g['interspace'])
        g['best_access_interspace'] = cl_array.zeros_like(g['interspace'])

        # arrays for counting
        # Reductions are typically tedious on GPU, and we need to define the
        # workgroupsize to allocate the correct amount of data
        WORKGROUPSIZE = 32
        nsubhists = int(np.ceil(g['rcore'].size/WORKGROUPSIZE))
        g['subhists'] = cl_array.zeros(q, (nsubhists, d['nrestraints'] + 1), dtype=np.float32)
        g['viol_counter'] = cl_array.zeros(q, (nsubhists, d['nrestraints'], d['nrestraints']), dtype=np.float32)

        # complex arrays
        g['ft_shape'] = list(d['shape'])
        g['ft_shape'][0] = d['shape'][0]//2 + 1
        g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
        g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])

        # other miscellanious data
        g['nrot'] = d['nrot']
        g['max_clash'] = d['max_clash']
        g['min_interaction'] = d['min_interaction']

        # kernels
        g['k'] = Kernels(q.context)
        g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
        g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])

        # initial calculations
        g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
        g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])
    def setup_device(self, imshape):

        print('Setting up with imshape = %s' % (str(imshape)))

        self.cached_shape = imshape

        self.clIm = cla.Array(self.q, imshape, np.float32)
        self.clm = cla.empty_like(self.clIm)
        self.clx = cla.empty_like(self.clIm)
        self.cly = cla.empty_like(self.clIm)
        self.clO = cla.zeros_like(self.clIm)
        self.clM = cla.zeros_like(self.clIm)
        self.clF = cla.empty_like(self.clIm)
        self.clS = cla.empty_like(self.clIm)
        self.clThisS = cla.empty_like(self.clIm)
        self.clScratch = cla.empty_like(self.clIm)

        self.radial_prg = pyopencl.Program(self.ctx, RADIAL_PROGRAM).build()

        self.sobel = Sobel(self.ctx, self.q)

        #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
        self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)

        self.accum = ElementwiseKernel(self.ctx,
                                       'float *a, float *b',
                                       'a[i] += b[i]')

        self.norm_s = ElementwiseKernel(self.ctx,
                                        'float *s, const float nRadii',
                                        's[i] = -1 * s[i] / nRadii',
                                        'norm_s')

        self.accum_s = ElementwiseKernel(self.ctx,
                                         'float *a, float *b, const float nr',
                                         'a[i] -= b[i] / nr')

        self.gaussians = {}
        self.gaussian_prgs = {}

        self.minmax = MinMaxKernel(self.ctx, self.q)

        # starburst storage

        clImageFormat = cl.ImageFormat(cl.channel_order.R,
                                       cl.channel_type.FLOAT)

        self.clIm2D = cl.Image(self.ctx,
                               mf.READ_ONLY,
                               clImageFormat,
                               imshape)

        # Create sampler for sampling image object
        self.imSampler = cl.Sampler(self.ctx,
                                    False,  # Non-normalized coordinates
                                    cl.addressing_mode.CLAMP_TO_EDGE,
                                    cl.filter_mode.LINEAR)

        self.cl_find_ray_boundaries = FindRayBoundaries(self.ctx, self.q)
Esempio n. 9
0
def zeros_like(array, backend=None):
    if backend is None:
        backend = array.backend
    if backend == 'opencl':
        import pyopencl.array as gpuarray
        out = gpuarray.zeros_like(array.dev)
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        out = gpuarray.zeros_like(array.dev)
    else:
        out = np.zeros_like(array.dev)
    return wrap_array(out, backend)
Esempio n. 10
0
    def __init__(self,
                 sino_shape,
                 slice_shape=None,
                 axis_position=None,
                 angles=None,
                 ctx=None,
                 devicetype="all",
                 platformid=None,
                 deviceid=None,
                 profile=False):
        OpenclProcessing.__init__(self,
                                  ctx=ctx,
                                  devicetype=devicetype,
                                  platformid=platformid,
                                  deviceid=deviceid,
                                  profile=profile)

        # Create a backprojector
        self.backprojector = Backprojection(sino_shape,
                                            slice_shape=slice_shape,
                                            axis_position=axis_position,
                                            angles=angles,
                                            ctx=self.ctx,
                                            profile=profile)
        # Create a projector
        self.projector = Projection(self.backprojector.slice_shape,
                                    self.backprojector.angles,
                                    axis_position=axis_position,
                                    detector_width=self.backprojector.num_bins,
                                    normalize=False,
                                    ctx=self.ctx,
                                    profile=profile)
        self.sino_shape = sino_shape
        self.is_cpu = self.backprojector.is_cpu
        # Arrays
        self.d_data = parray.zeros(self.queue, sino_shape, dtype=np.float32)
        self.d_sino = parray.zeros_like(self.d_data)
        self.d_x = parray.zeros(self.queue,
                                self.backprojector.slice_shape,
                                dtype=np.float32)
        self.d_x_old = parray.zeros_like(self.d_x)

        self.add_to_cl_mem({
            "d_data": self.d_data,
            "d_sino": self.d_sino,
            "d_x": self.d_x,
            "d_x_old": self.d_x_old,
        })
Esempio n. 11
0
    def test_2d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L = 4
        M = 64
        N = 32
        axes = (-1, -2)
        
        nd_data = np.arange(L*M*N, dtype=np.complex64)
        nd_data.shape = (L, M, N)
        cl_data = cla.to_device(queue, nd_data)
        
        cl_data_transformed = cla.zeros_like(cl_data)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
                        axes = axes,
                        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))
        
        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3, atol=1e-3)
    def computeEnergy(self, x, y, z, q):

        xd = cl_array.to_device(self.queue, x)
        yd = cl_array.to_device(self.queue, y)
        zd = cl_array.to_device(self.queue, z)
        qd = cl_array.to_device(self.queue, q)
        coulombEnergy = cl_array.zeros_like(xd)
        prec = x.dtype
        if prec == numpy.float32:
            self.compEnergyF.calc_potential_energy(self.queue,
                    (x.size, ), None,
                    xd.data, yd.data, zd.data,
                    qd.data, coulombEnergy.data, numpy.int32(len(x)),
                    numpy.float32(self.k),numpy.float32(self.impactFact),
                    g_times_l = False)
        elif prec == numpy.float64:
            self.compEnergyD.calc_potential_energy(self.queue,
                    (x.size, ), None,
                    xd.data, yd.data, zd.data,
                    qd.data, coulombEnergy.data, numpy.int32(len(x)) ,
                    numpy.float64(self.k),numpy.float64(self.impactFact),
                    g_times_l = False)
        else:
            print("Unknown float type.")

        return numpy.sum(coulombEnergy.get(self.queue))
Esempio n. 13
0
    def test_2d_in_4d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L1 = 4
        L2 = 5
        
        M = 64
        N = 32
        axes = (-1, -2) #ok
        #axes = (0,1) #ok
        #axes = (0,2) #cannot be collapsed
        
        nd_data = np.arange(L1*L2*M*N, dtype=np.complex64)
        nd_data.shape = (L1, L2, M, N)
        cl_data = cla.to_device(queue, nd_data)
        
        cl_data_transformed = cla.zeros_like(cl_data)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed,
                        axes = axes,
                        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))
        
        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3, atol=1e-3)
Esempio n. 14
0
 def _test_desparsification(self, input_on_device, output_on_device):
     current_config = "input on device: %s, output on device: %s" % (
         str(input_on_device), str(output_on_device)
     )
     # De-sparsify on device
     csr = CSR(self.array.shape, max_nnz=self.ref_nnz)
     if input_on_device:
         data = parray.to_device(csr.queue, self.ref_data)
         indices = parray.to_device(csr.queue, self.ref_indices)
         indptr = parray.to_device(csr.queue, self.ref_indptr)
     else:
         data = self.ref_data
         indices = self.ref_indices
         indptr = self.ref_indptr
     if output_on_device:
         d_arr = parray.zeros_like(csr.array)
         output = d_arr
     else:
         output = None
     arr = csr.densify(data, indices, indptr, output=output)
     if output_on_device:
         arr = arr.get()
     # Compare
     self.assertTrue(
         np.allclose(arr.reshape(self.array.shape), self.array),
         "something wrong with densified data (%s)"
         % current_config
     )
Esempio n. 15
0
    def test_2d_in_4d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L1 = 4
        L2 = 5

        M = 64
        N = 32
        axes = (-1, -2)  #ok
        #axes = (0,1) #ok
        #axes = (0,2) #cannot be collapsed

        nd_data = np.arange(L1 * L2 * M * N, dtype=np.complex64)
        nd_data.shape = (L1, L2, M, N)
        cl_data = cla.to_device(queue, nd_data)

        cl_data_transformed = cla.zeros_like(cl_data)

        transform = FFT(
            ctx,
            queue,
            cl_data,
            cl_data_transformed,
            axes=axes,
        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3,
                           atol=1e-3)
Esempio n. 16
0
    def test_2d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        L = 4
        M = 64
        N = 32
        axes = (-1, -2)

        nd_data = np.arange(L * M * N, dtype=np.complex64)
        nd_data.shape = (L, M, N)
        cl_data = cla.to_device(queue, nd_data)

        cl_data_transformed = cla.zeros_like(cl_data)

        transform = FFT(
            ctx,
            queue,
            cl_data,
            cl_data_transformed,
            axes=axes,
        )

        transform.enqueue()

        print(cl_data_transformed.get)
        print(np.fft.fft2(nd_data))

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft2(nd_data, axes=axes),
                           rtol=1e-3,
                           atol=1e-3)
Esempio n. 17
0
    def __init__(self,
                 sino_shape,
                 slice_shape=None,
                 axis_position=None,
                 angles=None,
                 ctx=None,
                 devicetype="all",
                 platformid=None,
                 deviceid=None,
                 profile=False):
        ReconstructionAlgorithm.__init__(self,
                                         sino_shape,
                                         slice_shape=slice_shape,
                                         axis_position=axis_position,
                                         angles=angles,
                                         ctx=ctx,
                                         devicetype=devicetype,
                                         platformid=platformid,
                                         deviceid=deviceid,
                                         profile=profile)
        self.compute_preconditioners()

        # Create a LinAlg instance
        self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
        # Positivity constraint
        self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a",
                                              "a[i] = max(a[i], 0.0f);")
        # Projection onto the L-infinity ball of radius Lambda
        self.elwise_proj_linf = ElementwiseKernel(
            self.ctx, "float2* a, float Lambda",
            "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
            "elwise_proj_linf")
        # Additional arrays
        self.linalg.gradient(self.d_x)
        self.d_p = parray.zeros_like(self.linalg.cl_mem["d_gradient"])
        self.d_q = parray.zeros_like(self.d_data)
        self.d_g = self.linalg.d_image
        self.d_tmp = parray.zeros_like(self.d_x)
        self.add_to_cl_mem({
            "d_p": self.d_p,
            "d_q": self.d_q,
            "d_tmp": self.d_tmp,
        })

        self.theta = 1.0
Esempio n. 18
0
    def zeros_like(t: Tensor, gpu=False) -> Tensor:
        """Return a tensor of zeros with the same shape and type as a given
        tensor.
        """

        if gpu:
            return Tensor(clarray.zeros_like(t._data), gpu=True)

        return Tensor(np.zeros_like(t._data, dtype=np.float32))
Esempio n. 19
0
    def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
                 ctx=None, devicetype="all", platformid=None, deviceid=None,
                 profile=False
                 ):
        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
                                  platformid=platformid, deviceid=deviceid,
                                  profile=profile)

        # Create a backprojector
        self.backprojector = Backprojection(
            sino_shape,
            slice_shape=slice_shape,
            axis_position=axis_position,
            angles=angles,
            ctx=self.ctx,
            profile=profile
        )
        # Create a projector
        self.projector = Projection(
            self.backprojector.slice_shape,
            self.backprojector.angles,
            axis_position=axis_position,
            detector_width=self.backprojector.num_bins,
            normalize=False,
            ctx=self.ctx,
            profile=profile
        )
        self.sino_shape = sino_shape
        self.is_cpu = self.backprojector.is_cpu
        # Arrays
        self.d_data = parray.zeros(self.queue, sino_shape, dtype=np.float32)
        self.d_sino = parray.zeros_like(self.d_data)
        self.d_x = parray.zeros(self.queue,
                                self.backprojector.slice_shape,
                                dtype=np.float32)
        self.d_x_old = parray.zeros_like(self.d_x)

        self.add_to_cl_mem({
            "d_data": self.d_data,
            "d_sino": self.d_sino,
            "d_x": self.d_x,
            "d_x_old": self.d_x_old,
        })
Esempio n. 20
0
    def test_1d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)

        nd_data = np.arange(32, dtype=np.complex64)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros_like(cl_data)

        transform = FFT(ctx, queue, cl_data, cl_data_transformed)
        transform.enqueue()

        assert np.allclose(cl_data_transformed.get(), np.fft.fft(nd_data))
Esempio n. 21
0
    def test_adj_inplace(self):
        inpfwd = clarray.to_device(self.queue, self.opinfwd)
        inpadj = clarray.to_device(self.queue, self.opinadj)

        outfwd = clarray.zeros_like(inpadj)
        outadj = clarray.zeros_like(inpfwd)

        self.op.fwd(outfwd, [inpfwd, [], self.grad_buf])
        self.op.adj(outadj, [inpadj, [], self.grad_buf])

        outfwd = outfwd.get()
        outadj = outadj.get()

        a = np.vdot(outfwd.flatten(),
                    self.opinadj.flatten())/self.opinadj.size
        b = np.vdot(self.opinfwd.flatten(),
                    outadj.flatten())/self.opinadj.size

        print("Adjointness: %.2e +1j %.2e" % ((a - b).real, (a - b).imag))
        np.testing.assert_allclose(a, b, rtol=RTOL, atol=ATOL)
Esempio n. 22
0
def ones_like(array, backend='cython'):
    if backend == 'opencl':
        import pyopencl.array as gpuarray
        dev_array = 1 + gpuarray.zeros_like(array)
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        dev_array = gpuarray.ones_like(array)
    else:
        return Array(np.ones_like(array))
    wrapped_array = Array()
    wrapped_array.set_dev_array(dev_array)
    return wrapped_array
Esempio n. 23
0
 def init_indices_buffers(self, image_width, image_height, kernels):
   mf = cl.mem_flags
   self.indices_host_buffer = numpy.arange(self.array_size, dtype=numpy.int32)
   self.indices_gpu_buffer = cl_array.arange(self.queue, 0, self.array_size, dtype=numpy.int32)
   self.sorted_indices_gpu_buffer = cl_array.zeros_like(self.indices_gpu_buffer)
                                        
   self.indices_host_back_buffers = {}
   for cell in kernels.keys():
     self.indices_host_back_buffers[cell] = {}
     for centre in kernels[cell].keys():
       self.indices_host_back_buffers[cell][centre] = numpy.zeros_like(self.source_host_buffer, 
                                                                       dtype=numpy.int32)    
Esempio n. 24
0
    def test_adj_inplace(self):
        inpfwd = clarray.to_device(self.queue, self.opinfwd)
        inpadj = clarray.to_device(self.queue, self.opinadj)

        outfwd = clarray.zeros_like(inpadj)
        outadj = clarray.zeros_like(inpfwd)

        self.op.fwd(outfwd, [inpfwd, [], self.grad_buf])
        self.op.adj(outadj, [inpadj, [], self.grad_buf])

        outfwd = outfwd.get()
        outadj = outadj.get()

        a = np.vdot(outfwd.flatten(),
                    self.opinadj.flatten()) / self.opinadj.size
        b = np.vdot(self.opinfwd.flatten(),
                    outadj.flatten()) / self.opinadj.size

        print("Adjointness: %.2e +1j %.2e" % ((a - b).real, (a - b).imag))

        self.assertAlmostEqual(a, b, places=12)
Esempio n. 25
0
 def _test_sparsification(self, input_on_device, output_on_device):
     current_config = "input on device: %s, output on device: %s" % (
         str(input_on_device), str(output_on_device)
     )
     # Sparsify on device
     csr = CSR(self.array.shape)
     if input_on_device:
         # The array has to be flattened
         arr = parray.to_device(csr.queue, self.array.ravel())
     else:
         arr = self.array
     if output_on_device:
         d_data = parray.zeros_like(csr.data)
         d_indices = parray.zeros_like(csr.indices)
         d_indptr = parray.zeros_like(csr.indptr)
         output = (d_data, d_indices, d_indptr)
     else:
         output = None
     data, indices, indptr = csr.sparsify(arr, output=output)
     if output_on_device:
         data = data.get()
         indices = indices.get()
         indptr = indptr.get()
     # Compare
     nnz = self.ref_nnz
     self.assertTrue(
         np.allclose(data[:nnz], self.ref_data),
         "something wrong with sparsified data (%s)"
         % current_config
     )
     self.assertTrue(
         np.allclose(indices[:nnz], self.ref_indices),
         "something wrong with sparsified indices (%s)"
         % current_config
     )
     self.assertTrue(
         np.allclose(indptr, self.ref_indptr),
         "something wrong with sparsified indices pointers (indptr) (%s)"
         % current_config
     )
Esempio n. 26
0
    def test_rotate_grid3d_linear(self):
        """Test rotate_grid3d kernel using nearest interpolation."""
        k = self.k._program.rotate_grid3d

        # Identity rotation
        rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7,
                            dtype=np.float32)
        grid = np.zeros((4, 5, 6), dtype=np.float32)
        grid[0, 0, 0] = 1
        grid[0, 0, 1] = 1
        grid[0, 1, 1] = 1
        grid[0, 0, 2] = 1
        grid[0, 0, -1] = 1
        grid[-1, 0, 0] = 1
        self.cl_grid = cl_array.to_device(self.queue, grid)
        self.cl_out = cl_array.zeros_like(self.cl_grid)

        args = (self.cl_grid.data, rotmat, self.cl_out.data, np.int32(False))
        gws = tuple([2 * self.values['llength'] + 1] * 3)
        k(self.queue, gws, None, *args)

        self.assertTrue(np.allclose(self.cl_grid.get(), self.cl_out.get()))

        # 90' rotation around z-axis
        self.cl_out.fill(0)
        rotmat = np.asarray([0, -1, 0, 1, 0, 0, 0, 0, 1] + [0] * 7,
                            dtype=np.float32)
        args = (self.cl_grid.data, rotmat, self.cl_out.data, np.int32(False))
        gws = tuple([2 * self.values['llength'] + 1] * 3)
        k(self.queue, gws, None, *args)

        answer = np.zeros(self.shape, dtype=np.float32)
        answer[0, 0, 0] = 1
        answer[0, 1, 0] = 1
        answer[0, 1, -1] = 1
        answer[0, 2, 0] = 1
        answer[0, -1, 0] = 1
        answer[-1, 0, 0] = 1
        self.assertTrue(np.allclose(answer, self.cl_out.get()))

        # Non-integer rotation
        rotmat = np.asarray(
            [[0.30901699, -0.5, 0.80901699], [-0.80901699, 0.30901699, 0.5],
             [-0.5, -0.80901699, -0.30901699]],
            dtype=np.float64)
        cl_rotmat = np.asarray(rotmat.ravel().tolist() + [0] * 7,
                               dtype=np.float32)
        args = (self.cl_grid.data, cl_rotmat, self.cl_out.data,
                np.int32(False))
        k(self.queue, gws, None, *args)
        rotate_grid3d(self.grid, rotmat, 2, self.out, False)
        test = np.allclose(self.cl_out.get(), self.out)
Esempio n. 27
0
def Wp_func(params, G, P, loc, eflag, out=None):
    s = G.slices
    sh = G.shapes

    # Again, vectors are done full-grid
    utcon = cl_array.empty(params['queue'], sh.grid_vector, dtype=np.float64)
    utcon[0] = 0
    utcon[1:] = P[s.U3VEC]

    utcov = G.lower_grid(utcon, loc)
    utsq = G.dot(utcon, utcov)

    global knl_Wp_func
    if knl_Wp_func is None:
        code = add_ghosts(
            replace_prim_names("""
        cond1 := ((utsq_in[i,j,k] < 0.) * (abs(utsq_in[i,j,k]) < 1.e-13))
        utsq1 := if(cond1, fabs(utsq_in[i,j,k]), utsq_in[i,j,k])

        # Catch utsq < 0 and record it
        cond2 := ((utsq1 < 0) + (utsq1 > 1.e3 * gamma_max ** 2))
        utsq := if(cond2, (P[RHO,i,j,k] + P[UU,i,j,k]), utsq1)
        eflag[i,j,k] = if(cond2, 2, eflag[i,j,k])

        gamma := sqrt(1. + fabs(utsq))
        Wp[i,j,k] = (P[RHO,i,j,k] + P[UU,i,j,k] + (gam - 1.) * P[UU,i,j,k]) * gamma ** 2 - P[RHO,i,j,k] * gamma
        """))
        knl_Wp_func = lp.make_kernel(
            sh.isl_grid_scalar,
            code, [
                *primsArrayArgs("P"), *scalarArrayArgs("utsq_in", "Wp"),
                *scalarArrayArgs("eflag", dtype=np.int32), ...
            ],
            assumptions=sh.assume_grid,
            default_offset=lp.auto)
        knl_Wp_func = lp.fix_parameters(knl_Wp_func,
                                        nprim=params['n_prim'],
                                        gam=params['gam'],
                                        gamma_max=params['gamma_max'])
        knl_Wp_func = tune_grid_kernel(knl_Wp_func, sh.bulk_scalar, ng=G.NG)
        print("Compiled Wp_func")

    if out is None:
        out = cl_array.zeros_like(utsq)

    evt, _ = knl_Wp_func(params['queue'],
                         P=P,
                         utsq_in=utsq,
                         Wp=out,
                         eflag=eflag)

    return out
    def test_adj_inplace(self):
        inpfwd = clarray.to_device(self.queue, self.opinfwd)
        inpadj = clarray.to_device(self.queue, self.opinadj)

        outfwd = clarray.zeros_like(inpadj)
        outadj = clarray.zeros_like(inpfwd)

        outfwd.add_event(
            self.op.fwd(outfwd, [inpfwd, self.coil_buf, self.grad_buf]))
        outadj.add_event(
            self.op.adj(outadj, [inpadj, self.coil_buf, self.grad_buf]))

        outfwd = outfwd.map_to_host(wait_for=outfwd.events)
        outadj = outadj.map_to_host(wait_for=outadj.events)

        a = np.vdot(outfwd.flatten(),
                    self.opinadj.flatten()) / self.opinadj.size
        b = np.vdot(self.opinfwd.flatten(),
                    outadj.flatten()) / self.opinadj.size

        print("Adjointness: %.2e +1j %.2e" % ((a - b).real, (a - b).imag))
        np.testing.assert_allclose(a, b, rtol=RTOL, atol=ATOL)
Esempio n. 29
0
    def setup_device(self, imshape):

        print('Setting up with imshape = %s' % (str(imshape)))

        self.imshape = imshape

        self.clIm = cla.Array(self.q, imshape, numpy.float32)
        self.clm = cla.empty_like(self.clIm)
        self.clx = cla.empty_like(self.clIm)
        self.cly = cla.empty_like(self.clIm)
        self.clO = cla.zeros_like(self.clIm)
        self.clM = cla.zeros_like(self.clIm)
        self.clF = cla.empty_like(self.clIm)
        self.clS = cla.empty_like(self.clIm)
        self.clThisS = cla.empty_like(self.clIm)
        self.clScratch = cla.empty_like(self.clIm)

        self.radial_prg = pyopencl.Program(self.ctx, PROGRAM).build()

        self.sobel = Sobel(self.ctx, self.q)

        #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
        self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)

        self.accum = ElementwiseKernel(self.ctx, 'float *a, float *b',
                                       'a[i] += b[i]')

        self.norm_s = ElementwiseKernel(self.ctx,
                                        'float *s, const float nRadii',
                                        's[i] = -1 * s[i] / nRadii', 'norm_s')

        self.accum_s = ElementwiseKernel(self.ctx,
                                         'float *a, float *b, const float nr',
                                         'a[i] -= b[i] / nr')

        self.gaussians = {}
        self.gaussian_prgs = {}

        self.minmax = MinMaxKernel(self.ctx, self.q)
Esempio n. 30
0
    def _init_cl_arrays(self):

        self.cl_G = cla.to_device(self.queue, self.G.astype(self.complexdtype))
        self.cl_G_conj = cla.to_device(self.queue,
                                       self.G.astype(self.complexdtype).conj())

        self.cl_work = cla.zeros(self.queue, tuple(self.N12_pad),
                                 self.complexdtype)
        self.cl_workF = cla.zeros_like(self.cl_work)

        self.cl_field1 = cla.empty(self.queue, tuple(self.N1),
                                   self.complexdtype)
        self.cl_field2 = cla.empty(self.queue, tuple(self.N2),
                                   self.complexdtype)
Esempio n. 31
0
    def test_1d_out_of_place(self, ctx):
        queue = cl.CommandQueue(ctx)
        
        nd_data = np.arange(32, dtype=np.complex64)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros_like(cl_data)
        
        transform = FFT(ctx, queue,
                        cl_data,
                        cl_data_transformed
        )
        transform.enqueue()

        assert np.allclose(cl_data_transformed.get(),
                           np.fft.fft(nd_data))
Esempio n. 32
0
 def raise_grid(self, vcov, loc=Loci.CENT, ocl=True, out=None):
     """Raise a grid of covariant rank-1 tensors to contravariant ones."""
     if self.use_ocl and ocl:
         if out is None:
             if isinstance(vcov, np.ndarray):
                 out = np.zeros_like(vcov)
             else:
                 out = cl_array.zeros_like(vcov)
         evt, _ = self.dot2geom(self.queue,
                                g=self.gcon_d[loc.value],
                                v=vcov,
                                out=out)
         return out
     else:
         return np.einsum("ij...,j...->i...",
                          self.gcon[loc.value, :, :, :, :, None], vcov)
Esempio n. 33
0
def gamma_func(params, G, Bsq, D, QdB, Qtsq, Wp, eflag, out=None):
    sh = G.shapes

    global knl_gamma_func
    if knl_gamma_func is None:
        code = add_ghosts("""
        W := D[i,j,k] + Wp[i,j,k]
        WB := W + Bsq[i,j,k]
        # This is basically inversion of eq. A7 of MM
        <> utsq = -((W + WB) * QdB[i,j,k]**2 + W**2 * Qtsq[i,j,k]) / \
                    (QdB[i,j,k]**2 * (W + WB) + W**2 * (Qtsq[i,j,k] - WB**2))

        # Catch utsq < 0 and record it
        cond := ((utsq < 0) + (utsq > 1.e3 * gamma_max ** 2))
        eflag[i,j,k] = if(cond, 2, eflag[i,j,k])

        gamma[i,j,k] = sqrt(1. + fabs(utsq))
        """)
        knl_gamma_func = lp.make_kernel(
            sh.isl_grid_scalar,
            code, [
                *scalarArrayArgs("Bsq", "D", "QdB", "Qtsq", "Wp", "gamma"),
                *scalarArrayArgs("eflag", dtype=np.int32), ...
            ],
            assumptions=sh.assume_grid,
            default_offset=lp.auto)
        knl_gamma_func = lp.fix_parameters(knl_gamma_func,
                                           gamma_max=params['gamma_max'])
        knl_gamma_func = tune_grid_kernel(knl_gamma_func,
                                          sh.bulk_scalar,
                                          ng=G.NG)
        print("Compiled gamma_func")

    if out is None:
        out = cl_array.zeros_like(Bsq)

    evt, _ = knl_gamma_func(params['queue'],
                            Bsq=Bsq,
                            D=D,
                            QdB=QdB,
                            Qtsq=Qtsq,
                            Wp=Wp,
                            gamma=out,
                            eflag=eflag)

    return out
Esempio n. 34
0
    def test_create_plan(self):
        G = gpyfftlib.GpyFFT()

        ctx = get_contexts()[0]
        queue = cl.CommandQueue(ctx)
        nd_data = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.complex64)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros_like(cl_data)

        plan = G.create_plan(ctx, cl_data.shape)

        print('plan.strides_in', plan.strides_in)
        print('plan.strides_out', plan.strides_out)
        print('plan.distances', plan.distances)
        print('plan.batch_size', plan.batch_size)
        del plan
        del G
Esempio n. 35
0
def err_eqn(params, G, Bsq, D, Ep, QdB, Qtsq, Wp, eflag, out=None):
    sh = G.shapes

    gamma = gamma_func(params, G, Bsq, D, QdB, Qtsq, Wp, eflag)

    global knl_err_eqn
    if knl_err_eqn is None:
        code = add_ghosts("""
        W := Wp[i,j,k] + D[i,j,k]
        w := W / (gamma[i,j,k]**2)
        rho0 := D[i,j,k] / gamma[i,j,k]
        pres := (w - rho0) * (gam - 1.) / gam

        err[i,j,k] = -Ep[i,j,k] + Wp[i,j,k] - pres + 0.5*Bsq[i,j,k] + \
                        0.5*(Bsq[i,j,k] * Qtsq[i,j,k] - QdB[i,j,k]**2)/((Bsq[i,j,k] + W)**2)
        """)
        knl_err_eqn = lp.make_kernel(
            sh.isl_grid_scalar,
            code, [
                *scalarArrayArgs("gamma", "Bsq", "D", "Ep", "QdB", "Qtsq",
                                 "Wp", "err"),
                *scalarArrayArgs("eflag", dtype=np.int32), ...
            ],
            assumptions=sh.assume_grid,
            default_offset=lp.auto)
        knl_err_eqn = lp.fix_parameters(knl_err_eqn,
                                        nprim=params['n_prim'],
                                        gam=params['gam'],
                                        gamma_max=params['gamma_max'])
        knl_err_eqn = tune_grid_kernel(knl_err_eqn, sh.bulk_scalar, ng=G.NG)

    if out is None:
        out = cl_array.zeros_like(Bsq)

    evt, _ = knl_err_eqn(params['queue'],
                         Bsq=Bsq,
                         D=D,
                         Ep=Ep,
                         QdB=QdB,
                         Qtsq=Qtsq,
                         Wp=Wp,
                         gamma=gamma,
                         err=out,
                         eflag=eflag)

    return out
Esempio n. 36
0
    def test_create_plan(self):
        G = gpyfftlib.GpyFFT()

        ctx = get_contexts()[0]
        queue = cl.CommandQueue(ctx)
        nd_data = np.array([[1, 2, 3, 4],
                            [5, 6, 7, 8]],
                           dtype=np.complex64)
        cl_data = cla.to_device(queue, nd_data)
        cl_data_transformed = cla.zeros_like(cl_data)

        plan = G.create_plan(ctx, cl_data.shape)

        print('plan.strides_in', plan.strides_in)
        print('plan.strides_out', plan.strides_out)
        print('plan.distances', plan.distances)
        print('plan.batch_size', plan.batch_size)
    def computeAcc(self, xd, yd, zd, vxd, vyd, vzd, qd, md, axd, ayd, azd, t,
                   dt):

        # Compute average numbers of scattered photons
        nbars = cl_array.zeros_like(xd)
        if self.sigma == None:
            self.program.compute_mean_scattered_photons_homogeneous_beam(
                self.queue, (xd.size, ), None, xd.data,
                yd.data, zd.data, vxd.data, vyd.data, vzd.data,
                numpy.float32(self.k0[0]), numpy.float32(self.k0[1]),
                numpy.float32(self.k0[2]), numpy.float32(self.gamma),
                numpy.float32(self.delta0), numpy.float32(self.S),
                numpy.float32(dt), numpy.int32(xd.size), nbars.data)
        else:
            self.program.compute_mean_scattered_photons_gaussian_beam(
                self.queue, (xd.size, ), None, xd.data,
                yd.data, zd.data, vxd.data, vyd.data, vzd.data,
                numpy.float32(self.k0[0]), numpy.float32(self.k0[1]),
                numpy.float32(self.k0[2]), numpy.float32(self.x0[0]),
                numpy.float32(self.x0[1]), numpy.float32(self.x0[2]),
                numpy.float32(self.sigma), numpy.float32(self.gamma),
                numpy.float32(self.delta0), numpy.float32(self.S),
                numpy.float32(dt), numpy.int32(xd.size), nbars.data)

        # Compute scattered photons and associated recoil kicks
        nMax = int(
            math.ceil(10.0 * self.S * (self.gamma / 2.0 / numpy.pi) * dt))
        actualNs = self.findSample(nbars, nMax)
        recoilDirectionsD = cl_array.Array(self.queue, [nbars.size, nMax, 3],
                                           dtype=numpy.float32)
        self.generator.fill_normal(recoilDirectionsD)

        # apply recoil kicks to particles
        recoilMomentum = numpy.linalg.norm(
            self.k0) * self._PlanckConstantReduced
        self.program.computeKicks(self.queue, (xd.size, ),
                                  None, md.data, actualNs.data,
                                  numpy.int32(nMax), recoilDirectionsD.data,
                                  numpy.float32(self.k0[0]),
                                  numpy.float32(self.k0[1]),
                                  numpy.float32(self.k0[2]),
                                  numpy.float32(recoilMomentum),
                                  numpy.float32(dt), axd.data, ayd.data,
                                  azd.data, numpy.int32(xd.shape[0]))
Esempio n. 38
0
 def template_test(self, test_name):
     data, kernel = self.get_data_and_kernel(test_name)
     conv = self.instantiate_convol(data.shape, kernel)
     if self.param["input_on_device"]:
         data_ref = parray.to_device(conv.queue, data)
     else:
         data_ref = data
     if self.param["output_on_device"]:
         d_res = parray.zeros_like(conv.data_out)
         res = d_res
     else:
         res = None
     res = conv(data_ref, output=res)
     if self.param["output_on_device"]:
         res = res.get()
     ref_func = self.get_reference_function(test_name)
     ref = ref_func(data, kernel)
     metric = self.compare(res, ref)
     logger.info("%s: max error = %.2e" % (test_name, metric))
     tol = self.tol[str("%dD" % kernel.ndim)]
     self.assertLess(metric, tol, self.print_err(conv))
Esempio n. 39
0
    def test_rotate_grid3d_nearest(self):
        """Test rotate_grid3d kernel using nearest interpolation."""
        k = self.k._program.rotate_grid3d

        # Identity rotation
        rotmat = np.asarray([1, 0, 0, 0, 1, 0, 0, 0, 1] + [0] * 7,
                            dtype=np.float32)
        grid = np.zeros((4, 5, 6), dtype=np.float32)
        grid[0, 0, 0] = 1
        grid[0, 0, 1] = 1
        grid[0, 1, 1] = 1
        grid[0, 0, 2] = 1
        grid[0, 0, -1] = 1
        grid[-1, 0, 0] = 1
        self.cl_grid = cl_array.to_device(self.queue, grid)
        self.cl_out = cl_array.zeros_like(self.cl_grid)

        args = (self.cl_grid.data, rotmat, self.cl_out.data, np.int32(True))
        gws = tuple([2 * self.values['llength'] + 1] * 3)
        k(self.queue, gws, None, *args)

        self.assertTrue(np.allclose(self.cl_grid.get(), self.cl_out.get()))

        # 90' rotation around z-axis
        self.cl_out.fill(0)
        rotmat = np.asarray([0, -1, 0, 1, 0, 0, 0, 0, 1] + [0] * 7,
                            dtype=np.float32)
        args = (self.cl_grid.data, rotmat, self.cl_out.data, np.int32(True))
        gws = tuple([2 * self.values['llength'] + 1] * 3)
        k(self.queue, gws, None, *args)

        answer = np.zeros(self.shape, dtype=np.float32)
        answer[0, 0, 0] = 1
        answer[0, 1, 0] = 1
        answer[0, 1, -1] = 1
        answer[0, 2, 0] = 1
        answer[0, -1, 0] = 1
        answer[-1, 0, 0] = 1
        self.assertTrue(np.allclose(answer, self.cl_out.get()))
    def computeEnergy(self, x, y, z, q):

        xd = cl_array.to_device(self.queue, x)
        yd = cl_array.to_device(self.queue, y)
        zd = cl_array.to_device(self.queue, z)
        qd = cl_array.to_device(self.queue, q)
        coulombEnergy = cl_array.zeros_like(xd)
        prec = x.dtype
        if prec == numpy.float32:
            self.compEnergyF.calc_potential_energy(self.queue, (x.size, ),
                                                   None,
                                                   xd.data,
                                                   yd.data,
                                                   zd.data,
                                                   qd.data,
                                                   coulombEnergy.data,
                                                   numpy.int32(len(x)),
                                                   numpy.float32(self.k),
                                                   numpy.float32(
                                                       self.impactFact),
                                                   g_times_l=False)
        elif prec == numpy.float64:
            self.compEnergyD.calc_potential_energy(self.queue, (x.size, ),
                                                   None,
                                                   xd.data,
                                                   yd.data,
                                                   zd.data,
                                                   qd.data,
                                                   coulombEnergy.data,
                                                   numpy.int32(len(x)),
                                                   numpy.float64(self.k),
                                                   numpy.float64(
                                                       self.impactFact),
                                                   g_times_l=False)
        else:
            print("Unknown float type.")

        return numpy.sum(coulombEnergy.get(self.queue))
Esempio n. 41
0
    def _rev_grad(self, valuation, adjoint, gradient, cache):
        q = pl.qs[0]
        X = cache[id(self.ops[0])]
        W = cache[id(self.ops[1])]
        b = cache[id(self.ops[2])]
        gy = adjoint
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = X.shape
        kh, kw = W.shape[2:]

        gW = clarray.zeros_like(W)
        gW_mat = gW.reshape(out_c, c * kh * kw)
        col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
        gy_mats = gy.reshape(n, out_c, out_h * out_w)

        for i in xrange(n):
            gwmat = linalg.dot(q, gy_mats[i], col_mats[i], transB=True)
            gW_mat += gwmat

        W_mat = W.reshape(out_c, -1)
        gcol = clarray.empty_like(self.col)
        gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)
        for i in xrange(n):
            gcol_mats[i] = linalg.dot(q, W_mat, gy_mats[i], transA=True)

        gx, ev = conv.col2im(q, gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        ev.wait()
        gb = None
        if b is not None:
            gb, ev = conv.bgrads_sum(q, gy)
            ev.wait()
        # TODO bias... sum along multiple axes of gy?
        # TODO set gW, gx and gb in gradient dict
        self.ops[0]._rev_grad(valuation, gx, gradient, cache)
        self.ops[1]._rev_grad(valuation, gW, gradient, cache)
        if gb is not None:
            self.ops[2]._rev_grad(valuation, gb, gradient, cache)
Esempio n. 42
0
    def _gpu_init(self):

        self.gpu_data = {}
        g = self.gpu_data
        d = self.data
        q = self.queue

        g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
        g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
        g['im_lsurf'] = cl.image_from_array(q.context, float32array(d['lsurf'].array))
        g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
                                  cl.filter_mode.LINEAR)

        g['lsurf'] = cl_array.zeros_like(g['rcore'])
        g['clashvol'] = cl_array.zeros_like(g['rcore'])
        g['intervol'] = cl_array.zeros_like(g['rcore'])
        g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)

        # complex arrays
        g['ft_shape'] = list(d['shape'])
        g['ft_shape'][0] = d['shape'][0]//2 + 1
        g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
        g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])

        # allocate SAXS arrays
        g['q'] = cl_array.to_device(q, float32array(d['q']))
        g['targetIq'] = cl_array.to_device(q, float32array(d['targetIq']))
        g['sq'] = cl_array.to_device(q, float32array(d['sq']))
        g['base_Iq'] = cl_array.to_device(q, float32array(d['base_Iq']))
        g['fifj'] = cl_array.to_device(q, float32array(d['fifj']))
        g['rind'] = cl_array.to_device(q, d['rind'].astype(np.int32))
        g['lind'] = cl_array.to_device(q, d['lind'].astype(np.int32))
        g_rxyz = np.zeros((d['rxyz'].shape[0], 4), dtype=np.float32)
        g_rxyz[:, :3] = d['rxyz'][:]
        g_lxyz = np.zeros((d['lxyz'].shape[0], 4), dtype=np.float32)
        g_lxyz[:, :3] = d['lxyz'][:]
        g['rxyz'] = cl_array.to_device(q, g_rxyz)
        g['lxyz'] = cl_array.to_device(q, g_lxyz)
        g['rot_lxyz'] = cl_array.zeros_like(g['lxyz'])
        g['chi2'] = cl_array.to_device(q, d['chi2'].astype(np.float32))
        g['best_chi2'] = cl_array.to_device(q, d['best_chi2'].astype(np.float32))
        g['rot_ind'] = cl_array.zeros(q, d['shape'], dtype=np.int32)

        g['origin'] = np.zeros(4, dtype=np.float32)
        g['origin'][:3] = d['origin'].astype(np.float32)
        g['voxelspacing'] = np.float32(self.voxelspacing)


        # kernels
        g['k'] = Kernels(q.context)
        g['saxs_k'] = saxs_Kernels(q.context)
        g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
        g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])

        g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
        g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])

        g['nrot'] = d['nrot']
        g['max_clash'] = d['max_clash']
        g['min_interaction'] = d['min_interaction']
Esempio n. 43
0
 def zeros_like(cls, arr):
     return cl_array.zeros_like(queue, arr)
Esempio n. 44
0
def zeros_like(a, dtype=None, order='K', subok=True):
    res = clarray.zeros_like(a)
    res.__class__ = myclArray
    res.reinit()
    return res
    def computeAcc(self, xd, yd, zd, vxd, vyd, vzd, qd, md, 
            axd, ayd, azd, t, dt):

        # Compute average numbers of scattered photons
        nbars = cl_array.zeros_like(xd)
        if self.sigma == None:
            self.program.compute_mean_scattered_photons_homogeneous_beam(
                    self.queue, (xd.size, ), None,
                    xd.data, yd.data, zd.data,
                    vxd.data, vyd.data, vzd.data,
                    numpy.float32(self.k0[0]),
                    numpy.float32(self.k0[1]),
                    numpy.float32(self.k0[2]),
                    numpy.float32(self.gamma),
                    numpy.float32(self.delta0),
                    numpy.float32(self.S),
                    numpy.float32(dt),
                    numpy.int32(xd.size),
                    nbars.data)
        else:
            self.program.compute_mean_scattered_photons_gaussian_beam(
                    self.queue, (xd.size, ), None,
                    xd.data, yd.data, zd.data,
                    vxd.data, vyd.data, vzd.data,
                    numpy.float32(self.k0[0]),
                    numpy.float32(self.k0[1]),
                    numpy.float32(self.k0[2]),
                    numpy.float32(self.x0[0]),
                    numpy.float32(self.x0[1]),
                    numpy.float32(self.x0[2]),
                    numpy.float32(self.sigma),
                    numpy.float32(self.gamma),
                    numpy.float32(self.delta0),
                    numpy.float32(self.S),
                    numpy.float32(dt),
                    numpy.int32(xd.size),
                    nbars.data)
        
        # Compute scattered photons and associated recoil kicks
        nMax = int(math.ceil(10.0 * self.S * 
                    (self.gamma / 2.0 / numpy.pi) * dt))
        actualNs = self.findSample(nbars, nMax)
        recoilDirectionsD = cl_array.Array(self.queue,
                [nbars.size, nMax, 3], dtype = numpy.float32)
        self.generator.fill_normal(recoilDirectionsD)

        # apply recoil kicks to particles
        recoilMomentum = numpy.linalg.norm(self.k0) * self._PlanckConstantReduced
        self.program.computeKicks(
                    self.queue, (xd.size, ), None,
                    md.data,
                    actualNs.data,
                    numpy.int32(nMax),
                    recoilDirectionsD.data,
                    numpy.float32(self.k0[0]),
                    numpy.float32(self.k0[1]),
                    numpy.float32(self.k0[2]),
                    numpy.float32(recoilMomentum),
                    numpy.float32(dt),
                    axd.data, ayd.data, azd.data,
                    numpy.int32(xd.shape[0]))
Esempio n. 46
0
h, w = datal.shape[:2]

#datal += np.random.rand(datal.size).reshape(datal.shape)*(1.0-datal)*0.5

#idxdark = datal<0.5
#idxlight = np.min(datal, axis=2)>0.5
#rnd = np.random.rand(datal[idxlight].size//3)*0.25
#datal[idxlight] *= 0.75
#datal[idxlight] += np.array(3*[rnd]).T#.reshape(-1, datal.shape[-1])
#datal[idxdark] += np.random.rand(datal[idxdark].size)*0.25

datalcl = arr_from_np(queue, datal.astype(np.float32))



res = clarray.zeros_like(datalcl)
gminiscl = clarray.zeros(dtype=np.uint32, shape=datalcl.shape[:2], queue=queue)


ksource = tpl.render(rads=rads, w=w, allc=allc, allct=allct, n=nn, dtype='float', crds=allcircle, numc=3)
print(ksource)
#exit()

program = cl.Program(ctx, ksource).build()
program.filter(queue, (h-2*rr, w-2*rr,), None, datalcl.ravel().data, res.data, gminiscl.data)

resint = np.round(res.get()*255).astype(np.uint8)

import tkinter as tk
from PIL import ImageDraw, Image, ImageTk
import sys
Esempio n. 47
0
 def zeros_like(self, a):
     arr = cl_array.zeros_like(a)
     self._cl_arrays.append(arr)
     return arr