예제 #1
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n,), dtype)

        a = a_gpu.get()

        for slc in [
                slice(None),
                slice(1000, 3000),
                slice(1000, -3000),
                slice(1000, None),
                slice(1000, None, 3),
                slice(1000, 1000),
                ]:
            sum_a = np.sum(a[slc])

            if sum_a:
                ref_divisor = abs(sum_a)
            else:
                ref_divisor = 1

            if slc.step is None:
                sum_a_gpu = cl_array.sum(a_gpu[slc]).get()
                assert abs(sum_a_gpu - sum_a) / ref_divisor < 1e-4

            sum_a_gpu_2 = cl_array.sum(a_gpu, slice=slc).get()
            assert abs(sum_a_gpu_2 - sum_a) / ref_divisor < 1e-4
예제 #2
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n,), dtype)

        a = a_gpu.get()

        for slc in [
                slice(None),
                slice(1000, 3000),
                slice(1000, -3000),
                slice(1000, None),
                slice(1000, None, 3),
                ]:
            sum_a = np.sum(a[slc])

            if slc.step is None:
                sum_a_gpu = cl_array.sum(a_gpu[slc]).get()
                assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4

            sum_a_gpu_2 = cl_array.sum(a_gpu, slice=slc).get()
            assert abs(sum_a_gpu_2 - sum_a) / abs(sum_a) < 1e-4
예제 #3
0
    def _preserve_power(self, real, imag, speckles, stage):
        
        assert stage in ('in','out'), "unrecognized _preserve_power stage %s"%stage

        if stage == 'in':
            # make the components into  get m0 and the speckle power.
            # m0 is a float2 which stores the (0,0) of re and im
            # replace the (0,0) component with the average of the surrounding neighbors to prevent envelope distortion due to non-zero average magnetization
            
            self.get_m0.execute(self.queue,(1,), real.data, imag.data, self.m0_1.data)
            self.make_speckle(real,imag, speckles)
            self.power_in = cla.sum(speckles).get()-(self.m0_1.get()[0])**2
            self.replace_dc_component1.execute(self.queue,(1,), speckles.data, speckles.data, np.int32(self.N))
            
        if stage == 'out':
            # preserve the total amount of speckle power outside the (0,0) component
            # put m0_1 back as the (0,0) component so that the average magnetization is not effected by the rescaling
            
            self.get_m0.execute(self.queue,(1,), real.data, imag.data, self.m0_2.data)
            self.make_speckle(real, imag, speckles)
            self.power_out = cla.sum(speckles).get()-(self.m0_2.get()[0])**2
            ratio = (np.sqrt(self.power_in/self.power_out)).astype(np.float32)
            self.scalar_multiply(ratio,real)
            self.scalar_multiply(ratio,imag)
            self.replace_dc_component2.execute(self.queue,(1,), real.data, imag.data, self.m0_1.data)
예제 #4
0
def sum(ary, backend=None):
    if backend is None:
        backend = ary.backend
    if backend == 'cython':
        return np.sum(ary.dev)
    if backend == 'opencl':
        import pyopencl.array as gpuarray
        return gpuarray.sum(ary.dev).get()
    if backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        return gpuarray.sum(ary.dev).get()
예제 #5
0
    def pol_vor_rho(self, Y, px, py):
        '''return: polarization, vorticity, density at given Y,px,py'''
        self.prg.polarization_on_sf(self.queue, (self.size_sf, ), None,
                                    self.d_pol.data, self.d_vor.data,
                                    self.d_rho.data, self.d_smu, self.d_umu,
                                    self.d_omegaY, self.d_etas, np.float32(Y),
                                    np.float32(px), np.float32(py),
                                    np.int32(self.size_sf)).wait()

        polarization = cl_array.sum(self.d_pol).get()
        vorticity = cl_array.sum(self.d_vor).get()
        density = cl_array.sum(self.d_rho).get()
        return polarization, vorticity, density
예제 #6
0
    def get_divergence_error(vector):
        for mu in range(3):
            fft.idft(vector[mu], vector_x[mu])

        derivs.divergence(queue, vector_x, div)

        derivs(queue, fx=vector_x[0], pdx=pdx[0])
        derivs(queue, fx=vector_x[1], pdy=pdx[1])
        derivs(queue, fx=vector_x[2], pdz=pdx[2])
        norm = sum([clm.fabs(pdx[mu]) for mu in range(3)])

        max_err = cla.max(clm.fabs(div)) / cla.max(norm)
        avg_err = cla.sum(clm.fabs(div)) / cla.sum(norm)
        return max_err, avg_err
예제 #7
0
    def get_total_energy_and_entropy_on_gpu(self, tau, d_ev):
        NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ
        self.kernel_bulk.total_energy_and_entropy(self.queue, (NX, NY, NZ),
                                                  None, self.a_ed.data,
                                                  self.a_entropy.data, d_ev,
                                                  self.eos_table,
                                                  np.float32(tau)).wait()

        volum = tau * self.cfg.DX * self.cfg.DY * self.cfg.DZ

        e_total = cl_array.sum(self.a_ed).get() * volum
        s_total = cl_array.sum(self.a_entropy).get() * volum

        self.energy.append(e_total)
        self.entropy.append(s_total)
    def __call__(self, im, nrays, nsamples, ray_step, seed_pt, cutoff, thresh):

        nrays = int(nrays)
        nsamples = int(nsamples)
        cutoff = np.int32(cutoff)

        arrays = self.setup_arrays(nrays, nsamples, cutoff)

        prog = self.build_program(nrays, nsamples, ray_step)

        prog.sample_rays(self.queue,
                        (nsamples, nrays),
                        None,
                        arrays.scratch.data,
                        im,
                        np.float32(seed_pt[0]),
                        np.float32(seed_pt[1]))

        # take the region in the cutoff zone
        cla.take(arrays.scratch,
                 arrays.idx,
                 out=arrays.pre_cutoff)

        # plt.imshow(self.pre_cutoff.get())
        # plt.show()
        self.square_array(arrays.pre_cutoff, arrays.pre_cutoff_squared)

        inside_mean = cla.sum(arrays.pre_cutoff).get() / (cutoff * nrays)
        inside_sumsq = cla.sum(arrays.pre_cutoff_squared).get() / (cutoff * nrays)
        inside_std = np.sqrt(inside_sumsq - inside_mean ** 2)

        normed_thresh = inside_std * thresh

        prog.scan_boundary(self.queue,
                          (nrays,),
                          None,
                          arrays.result.data,
                          arrays.scratch.data,
                          np.float32(normed_thresh))

        # print normed_thresh
        # plt.figure()
        # plt.hold(True)
        # plt.imshow(arrays.scratch.get())
        # plt.plot(np.arange(0, nrays), arrays.result.get())
        # plt.show()

        return arrays.result.get()
예제 #9
0
    def mean(t: Tensor) -> np.float32:
        """The mean of the values in a tensor."""

        if t.gpu:
            return clarray.sum(t._data).get().flat[0] / t._data.size

        return np.mean(t._data)
예제 #10
0
    def sum(t: Tensor) -> np.float32:
        """The sum of the values in a tensor."""

        if t.gpu:
            return clarray.sum(t._data).get().flat[0]

        return np.sum(t._data)
예제 #11
0
    def _calcResidual(self, step_out, tmp_results, step_in, data):

        f_new = clarray.vdot(tmp_results["DADA"], tmp_results["DAd"]) + clarray.sum(
            self.lambd
            * clmath.log(1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"]))
        )

        # TODO: calculate on GPU
        f_new = np.linalg.norm(f_new.get())

        grad_f = np.linalg.norm(tmp_results["gradFx"].get())

        # TODO: datacosts calculate or get from outside!!!!
        # datacost = 0  # self._fval_init
        # TODO: calculate on GPU
        datacost = 2 * np.linalg.norm(tmp_results["Ax"] - data) ** 2
        # datacost = 2 * np.linalg.norm(data - b) ** 2
        # self._FT.FFT(b, clarray.to_device(
        #       self._queue[0], (self._step_val[:, None, ...] *
        #          self.par["C"]))).wait()
        # b = b.get()
        # datacost = 2 * np.linalg.norm(data - b) ** 2
        # TODO: calculate on GPU
        L2Cost = np.linalg.norm(step_out["x"].get()) / (2.0 * self.delta)
        regcost = self.lambd * np.sum(
            np.abs(
                clmath.log(
                    1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"])
                ).get()
            )
        )
        costs = datacost + L2Cost + regcost
        return costs, f_new, grad_f
예제 #12
0
def gs_mod_gpu(idata, itera=10, osize=256):

    cut = osize // 2

    pl = cl.get_platforms()[0]
    devices = pl.get_devices(device_type=cl.device_type.GPU)
    ctx = cl.Context(devices=[devices[0]])
    queue = cl.CommandQueue(ctx)

    plan = Plan(idata.shape, queue=queue,
                dtype=complex128)  #no funciona con "complex128"

    src = str(
        Template(KERNEL).render(
            double_support=all(has_double_support(dev) for dev in devices),
            amd_double_support=all(
                has_amd_double_support(dev) for dev in devices)))
    prg = cl.Program(ctx, src).build()

    idata_gpu = cl_array.to_device(queue,
                                   ifftshift(idata).astype("complex128"))
    fdata_gpu = cl_array.empty_like(idata_gpu)
    rdata_gpu = cl_array.empty_like(idata_gpu)
    plan.execute(idata_gpu.data, fdata_gpu.data)

    mask = exp(2.j * pi * random(idata.shape))
    mask[512 - cut:512 + cut, 512 - cut:512 + cut] = 0

    idata_gpu = cl_array.to_device(
        queue,
        ifftshift(idata + mask).astype("complex128"))
    fdata_gpu = cl_array.empty_like(idata_gpu)
    rdata_gpu = cl_array.empty_like(idata_gpu)
    error_gpu = cl_array.to_device(ctx, queue,
                                   zeros(idata_gpu.shape).astype("double"))
    plan.execute(idata_gpu.data, fdata_gpu.data)

    e = 1000
    ea = 1000
    for i in range(itera):
        prg.norm(queue, fdata_gpu.shape, None, fdata_gpu.data)
        plan.execute(fdata_gpu.data, rdata_gpu.data, inverse=True)
        #~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
        norm1 = prg.norm1
        norm1.set_scalar_arg_dtypes([None, None, None, int32])
        norm1(queue, rdata_gpu.shape, None, rdata_gpu.data, idata_gpu.data,
              error_gpu.data, int32(cut))

        e = sqrt(cl_array.sum(error_gpu).get()) / (2 * cut)

        #~ if e>ea:
        #~
        #~ break
        #~ ea=e
        plan.execute(rdata_gpu.data, fdata_gpu.data)

    fdata = fdata_gpu.get()
    fdata = ifftshift(fdata)
    fdata = exp(1.j * angle(fdata))
    return fdata
예제 #13
0
파일: gs.py 프로젝트: ramezquitao/pyoptools
def gs_mod_gpu(idata,itera=10,osize=256):
    
    
    cut=osize//2
    
    pl=cl.get_platforms()[0]
    devices=pl.get_devices(device_type=cl.device_type.GPU)
    ctx = cl.Context(devices=[devices[0]])
    queue = cl.CommandQueue(ctx)

    plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128"
    
    src = str(Template(KERNEL).render(
        double_support=all(
            has_double_support(dev) for dev in devices),
        amd_double_support=all(
            has_amd_double_support(dev) for dev in devices)
        ))
    prg = cl.Program(ctx,src).build() 
    

    idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128"))
    fdata_gpu=cl_array.empty_like(idata_gpu)
    rdata_gpu=cl_array.empty_like(idata_gpu)
    plan.execute(idata_gpu.data,fdata_gpu.data)
    
    mask=exp(2.j*pi*random(idata.shape))
    mask[512-cut:512+cut,512-cut:512+cut]=0
    
    
    idata_gpu=cl_array.to_device(queue, ifftshift(idata+mask).astype("complex128"))
    fdata_gpu=cl_array.empty_like(idata_gpu)
    rdata_gpu=cl_array.empty_like(idata_gpu)
    error_gpu=cl_array.to_device(ctx, queue, zeros(idata_gpu.shape).astype("double"))
    plan.execute(idata_gpu.data,fdata_gpu.data)
    
    e=1000
    ea=1000
    for i in range (itera):
        prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data)
        plan.execute(fdata_gpu.data,rdata_gpu.data,inverse=True)
        #~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
        norm1=prg.norm1
        norm1.set_scalar_arg_dtypes([None, None, None, int32])
        norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
        
        e= sqrt(cl_array.sum(error_gpu).get())/(2*cut)

        #~ if e>ea: 
           #~ 
            #~ break
        #~ ea=e
        plan.execute(rdata_gpu.data,fdata_gpu.data)
    
    fdata=fdata_gpu.get()
    fdata=ifftshift(fdata)
    fdata=exp(1.j*angle(fdata))
    return fdata
예제 #14
0
    def get_divergence_errors(hij):
        max_errors = []
        avg_errors = []
        for i in range(1, 4):
            for mu in range(3):
                fft.idft(hij[tensor_id(i, mu + 1)], vector_x[mu])

            derivs.divergence(queue, vector_x, div)

            derivs(queue, fx=vector_x[0], pdx=pdx[0])
            derivs(queue, fx=vector_x[1], pdy=pdx[1])
            derivs(queue, fx=vector_x[2], pdz=pdx[2])
            norm = sum([clm.fabs(pdx[mu]) for mu in range(3)])

            max_errors.append(cla.max(clm.fabs(div)) / cla.max(norm))
            avg_errors.append(cla.sum(clm.fabs(div)) / cla.sum(norm))

        return np.array(max_errors), np.array(avg_errors)
예제 #15
0
    def minZerrKernSHG_gpu(self):
        krn = self.progs.progs["minZerrSHG"].minZerrSHG
        krn.set_scalar_arg_dtypes((None, None, None, None, None, None, None, None, np.int32))
        krn.set_args(
            self.Esig_t_tau_p_cla.data,
            self.Et_cla.data,
            self.dZ_cla.data,
            self.X0_cla.data,
            self.X1_cla.data,
            self.X2_cla.data,
            self.X3_cla.data,
            self.X4_cla.data,
            self.N,
        )
        ev = cl.enqueue_nd_range_kernel(self.q, krn, self.Et.shape, None)
        ev.wait()

        krn = self.progs.progs["normEsig"].normEsig
        krn.set_scalar_arg_dtypes((None, None, np.int32))
        krn.set_args(self.Esig_t_tau_p_cla.data, self.Esig_t_tau_norm_cla.data, self.N)
        ev = cl.enqueue_nd_range_kernel(self.q, krn, self.Esig_t_tau_p.shape, None)
        ev.wait()
        mx = cla.max(self.Esig_t_tau_norm_cla).get() * self.N * self.N

        #         Esig_t_tau = self.Esig_t_tau_p_cla.get()
        #         mx = ((Esig_t_tau*Esig_t_tau.conj()).real).max() * self.N*self.N

        X0 = cla.sum(self.X0_cla, queue=self.q).get() / mx
        X1 = cla.sum(self.X1_cla, queue=self.q).get() / mx
        X2 = cla.sum(self.X2_cla, queue=self.q).get() / mx
        X3 = cla.sum(self.X3_cla, queue=self.q).get() / mx
        X4 = cla.sum(self.X4_cla, queue=self.q).get() / mx

        root.debug("".join(("X0=", str(X0), ", type ", str(type(X0)))))

        root.debug(
            "".join(("Poly: ", str(X4), " x^4 + ", str(X3), " x^3 + ", str(X2), " x^2 + ", str(X1), " x + ", str(X0)))
        )
        # Polynomial in dZ (expansion of differential)
        X = np.array([X0, X1, X2, X3, X4]).astype(np.double)

        root.debug("".join(("Esig_t_tau_p norm max: ", str(mx / (self.N * self.N)))))

        return X
예제 #16
0
    def _cl_count_complexes(self, weight):
        # Count all sampled complexes
        self._cl_tot_complex += cl_array.sum(self._cl_interspace,
                dtype=np.dtype(np.float32)) * weight
        self._cl_kernels.set_to_i32(np.int32(0), self._cl_hist)

        self._cl_kernels.histogram(self.queue, self._cl_red_interspace, self._cl_hist)
        self._cl_kernels.multiply_add(self._cl_hist, weight,
                self._cl_consistent_complexes)
        self.queue.finish()
예제 #17
0
 def _ggr_spa_error(self,spa):
     # promote the available spins by value target*spa. store in the spa_buffer. bound spa_buffer.
     # calculate the new total magnetization for this spa value. difference of total and desired is the error function.
     
     self.ggr_promote_spins(self.domains,self.available,self.spa_buffer,self.target*spa)
     self.bound(self.spa_buffer,self.spa_buffer)
     buffer_average = (cla.sum(self.spa_buffer).get())/self.N2
     e = abs(buffer_average-self.goal_m)
     #print "    %.6e, %.3e"%(spa,e)
     return e
예제 #18
0
파일: mynp.py 프로젝트: ixtel/neurolabcl
 def sum(*args, **kwargs):
     a = args[0]
     if a.ndim==0 or not 'axis' in kwargs.keys():
         res = clarray.sum(a, queue=queue) #np.sum(*args, **kwargs)
         if not isinstance(res, myclArray):
             res.__class__ = myclArray
             res.reinit()
         return res
     else:
         kwargs['prg2load'] = programs.sum
         return _sum(*args, **kwargs)
예제 #19
0
파일: lab1.py 프로젝트: obask/CL
def alter_sum():
    ctx = cl_init()
    queue = cl.CommandQueue(ctx)

    n = 10**6
    a_gpu = cl_array.to_device(queue, np.random.randn(n).astype(np.float32))
    b_gpu = cl_array.to_device(queue, np.random.randn(n).astype(np.float32))

    cl_sum = cl_array.sum(a_gpu).get()
    numpy_sum = np.sum(a_gpu.get())

    print cl_sum, numpy_sum
예제 #20
0
def test_sum(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_gpu = clrand(context, queue, (200000,), np.float32)
    a = a_gpu.get()

    sum_a = np.sum(a)
    sum_a_gpu = cl_array.sum(a_gpu).get()

    assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
예제 #21
0
    def radon_normest(queue, r_struct):
        img = clarray.to_device(
            queue, require(random.randn(*r_struct[1]), float32, 'F'))
        sino = clarray.zeros(queue, r_struct[2], dtype=float32, order='F')

        V = (radon(sino, img, r_struct, wait_for=img.events))

        for i in range(10):
            normsqr = float(clarray.sum(img).get())
            img /= normsqr
            sino.add_event(radon(sino, img, r_struct, wait_for=img.events))
            img.add_event(radon_ad(img, sino, r_struct, wait_for=sino.events))

        return sqrt(normsqr)
예제 #22
0
파일: lab1.py 프로젝트: spetz911/CL
def alter_sum():
	ctx = cl_init()
	queue = cl.CommandQueue(ctx)

	n = 10**6
	a_gpu = cl_array.to_device(
		    queue, np.random.randn(n).astype(np.float32))
	b_gpu = cl_array.to_device(
		    queue, np.random.randn(n).astype(np.float32))

	cl_sum = cl_array.sum(a_gpu).get()
	numpy_sum = np.sum(a_gpu.get())

	print cl_sum, numpy_sum
예제 #23
0
def test_sum(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n,), dtype)

        a = a_gpu.get()

        sum_a = np.sum(a)
        sum_a_gpu = cl_array.sum(a_gpu).get()

        assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4
예제 #24
0
def test_outoforderqueue_reductions(ctx_factory):
    context = ctx_factory()
    try:
        queue = cl.CommandQueue(context,
               properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
    except Exception:
        pytest.skip("out-of-order queue not available")
    # 0/1 values to avoid accumulated rounding error
    a = (np.random.rand(10**6) > 0.5).astype(np.dtype('float32'))
    a[800000] = 10  # all<5 looks true until near the end
    a_gpu = cl_array.to_device(queue, a)
    b1 = cl_array.sum(a_gpu).get()
    b2 = cl_array.dot(a_gpu, 3 - a_gpu).get()
    b3 = (a_gpu < 5).all().get()
    assert b1 == a.sum() and b2 == a.dot(3 - a) and b3 == 0
예제 #25
0
 def check_convergence(self):
     
     # calculate the difference of the previous domains (self.incoming) and the current domains (self.domains).
     self.array_diff(self.incoming,self.domains,self.domain_diff)
     
     # sum the difference array and divide by the area of the simulation as a metric of how much the two domains
     # configurations differ. 
     self.power = (cla.sum(self.domain_diff).get())/self.N2
     self.powerlist.append(self.power)
     
     # set the convergence condition
     if self.power >  self.converged_at: self.converged = False
     if self.power <= self.converged_at: self.converged = True
     
     if 'converged' in self.returnables_list and self.converged: self.returnables['converged'] = self.domains.get()
예제 #26
0
    def test_sum(ctx_getter):
        context = ctx_getter()
        queue = cl.CommandQueue(context)

        from pyopencl.clrandom import rand as clrand

        a_gpu = clrand(context, queue, (200000, ))
        a = a_gpu.get()

        sum_a = numpy.sum(a)

        from pycuda.reduction import get_sum_kernel
        sum_a_gpu = cl_array.sum(a_gpu).get()

        assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4
예제 #27
0
    def test_sum(ctx_getter):
        context = ctx_getter()
        queue = cl.CommandQueue(context)

        from pyopencl.clrandom import rand as clrand

        a_gpu = clrand(context, queue, (200000,))
        a = a_gpu.get()

        sum_a = numpy.sum(a)

        from pycuda.reduction import get_sum_kernel
        sum_a_gpu = cl_array.sum(a_gpu).get()

        assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
예제 #28
0
def sum(q, a, axis=None, out=None, keepdims=False):
    if axis is None or a.ndim <= 1:
        out_shape = (1, ) * a.ndim if keepdims else ()
        return clarray.sum(a).reshape(out_shape)

    if axis < 0:
        axis += 2
    if axis > 1:
        raise ValueError('invalid axis')

    if a.flags.c_contiguous:
        m, n = a.shape
        lda = a.shape[1]
        transA = True if axis == 0 else False
        sum_axis, out_axis = (m, n) if axis == 0 else (n, m)
    else:
        n, m = a.shape
        lda = a.shape[0]
        transA = False if axis == 0 else True
        sum_axis, out_axis = (n, m) if axis == 0 else (m, n)

    ones = clarray.empty(q, (sum_axis, ), a.dtype).fill(1.0)
    if keepdims:
        out_shape = (1, out_axis) if axis == 0 else (out_axis, 1)
    else:
        out_shape = (out_axis, )

    if out is None:
        out = clarray.zeros(q, out_shape, a.dtype)
    else:
        assert out.dtype == a.dtype
        assert out.size >= out_axis

    if a.dtype == np.float32:
        gemv = clblaswrap.sgemv
    elif a.dtype == np.float64:
        gemv = clblaswrap.dgemv
    else:
        raise TypeError('Unsupported array type: %s' % str(a.dtype))

    alpha = 1.0
    beta = 0.0

    ev = gemv(q, transA, m, n, alpha, a, lda, ones, 1, beta, out, 1)
    ev.wait()

    return out
예제 #29
0
    def find_contacts(self, predict=True):
        """Call the find_contacts kernel.

        Assumes that cell_centers, cell_dirs, cell_lens, cell_rads,
        cell_sqs, cell_dcenters, cell_dlens, cell_dangs,
        sorted_ids, and sq_inds are current on the device.

        Calculates cell_n_cts, ct_frs, ct_tos, ct_dists, ct_pts,
        ct_norms, ct_reldists, and n_cts.
        """
        if predict:
            centers = self.pred_cell_centers_dev
            dirs = self.pred_cell_dirs_dev
            lens = self.pred_cell_lens_dev
        else:
            centers = self.cell_centers_dev
            dirs = self.cell_dirs_dev
            lens = self.cell_lens_dev

        self.program.find_plane_contacts(
            self.queue, (self.n_cells, ), None, numpy.int32(self.max_cells),
            numpy.int32(self.max_contacts), numpy.int32(self.n_planes),
            self.plane_pts_dev.data, self.plane_norms_dev.data,
            self.plane_coeffs_dev.data, centers.data, dirs.data, lens.data,
            self.cell_rads_dev.data, self.cell_n_cts_dev.data,
            self.ct_frs_dev.data, self.ct_tos_dev.data, self.ct_dists_dev.data,
            self.ct_pts_dev.data, self.ct_norms_dev.data,
            self.ct_reldists_dev.data, self.ct_stiff_dev.data).wait()

        self.program.find_contacts(
            self.queue, (self.n_cells, ), None, numpy.int32(self.max_cells),
            numpy.int32(self.n_cells), numpy.int32(self.grid_x_min),
            numpy.int32(self.grid_x_max), numpy.int32(self.grid_y_min),
            numpy.int32(self.grid_y_max), numpy.int32(self.n_sqs),
            numpy.int32(self.max_contacts), centers.data, dirs.data, lens.data,
            self.cell_rads_dev.data, self.cell_sqs_dev.data,
            self.sorted_ids_dev.data, self.sq_inds_dev.data,
            self.cell_n_cts_dev.data, self.ct_frs_dev.data,
            self.ct_tos_dev.data, self.ct_dists_dev.data, self.ct_pts_dev.data,
            self.ct_norms_dev.data, self.ct_reldists_dev.data,
            self.ct_stiff_dev.data, self.ct_overlap_dev.data).wait()

        # set dtype to int32 so we don't overflow the int32 when summing
        #self.n_cts = self.cell_n_cts_dev.get().sum(dtype=numpy.int32)
        self.n_cts = cl_array.sum(self.cell_n_cts_dev[0:self.n_cells]).get()
예제 #30
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n, ), dtype)

        a = a_gpu.get()

        for slc in [
                slice(None),
                slice(1000, 3000),
                slice(1000, -3000),
                slice(1000, None),
        ]:
            sum_a = np.sum(a[slc])
            sum_a_gpu = cl_array.sum(a_gpu[slc]).get()

            assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4
예제 #31
0
파일: mynp.py 프로젝트: ixtel/neurolabcl
def _sum(a, axis=None, dtype=None, out=None, prg2load=programs.sum):
    #Transpose first to shift target axis to the end
    #do not transpose if axis already is the end
    if axis==None:
        res = clarray.sum(a, queue=queue)
        if not isinstance(res, myclArray):
            res.__class__ = myclArray
            res.reinit()
        return res
    olddims = np.array(a.shape, dtype=np.uint32)
    replaces = np.append(np.delete(np.arange(a.ndim), axis, 0), [axis], 0).astype(np.uint32)
    if axis != a.ndim-1:
        clolddims = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=olddims)
        clreplaces = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=replaces)
        cltrresult = cl.Buffer(ctx, mf.READ_WRITE, a.nbytes)
        program = programs.transpose(a.dtype, a.ndim)
        program.mitransp(queue, (a.size,), None, clolddims, clreplaces, a.data, cltrresult)
    else:
        cltrresult = a.data
    program = prg2load(a.dtype, a.shape[axis])
    #Sum for last axis
    result = empty(tuple(olddims[replaces[:-1]]), a.dtype)
    program.misum(queue, (int(a.size//a.shape[axis]),), None, cltrresult, result.data)
    return result
예제 #32
0
    def _gpu_search(self):
        """Method that actually performs the exhaustive search on the GPU"""

        # make shortcuts
        d = self.data
        g = self.gpu_data
        q = self.queue
        k = g['k']

        # initalize the total number of sampled complexes
        tot_complexes = cl_array.sum(g['interspace'], dtype=np.float32)

        # initialize time
        time0 = _time()

        # loop over all rotations
        for n in xrange(g['nrot']):

            # rotate the scanning chain object
            k.rotate_image3d(q, g['sampler'], g['im_lsurf'], self.rotations[n],
                             g['lsurf'], d['im_center'])

            # perform the FFTs and calculate the clashing and interaction volume
            k.rfftn(q, g['lsurf'], g['ft_lsurf'])
            k.c_conj_multiply(q, g['ft_lsurf'], g['ft_rcore'],
                              g['ft_clashvol'])
            k.irfftn(q, g['ft_clashvol'], g['clashvol'])

            k.c_conj_multiply(q, g['ft_lsurf'], g['ft_rsurf'],
                              g['ft_intervol'])
            k.irfftn(q, g['ft_intervol'], g['intervol'])

            # determine at every position if the conformation is a proper complex
            k.touch(q, g['clashvol'], g['max_clash'], g['intervol'],
                    g['min_interaction'], g['interspace'])

            if self.distance_restraints:
                k.fill(q, g['restspace'], 0)

                # determine the space that is consistent with a number of
                # distance restraints
                k.distance_restraint(q, g['restraints'], self.rotations[n],
                                     g['restspace'])

                # get the accessible interaction space also consistent with a
                # certain number of distance restraints
                k.multiply(q, g['restspace'], g['interspace'],
                           g['access_interspace'])

            # calculate the total number of complexes, while taking into
            # account orientational/rotational bias
            tot_complexes += cl_array.sum(g['interspace'],
                                          dtype=np.float32) * np.float32(
                                              self.weights[n])

            # take at every position in space the maximum number of consistent
            # restraints for later visualization
            cl_array.maximum(g['best_access_interspace'],
                             g['access_interspace'],
                             g['best_access_interspace'])

            # calculate the number of accessable complexes consistent with
            # EXACTLY N distance restraints
            k.histogram(q, g['access_interspace'], g['subhists'],
                        self.weights[n], d['nrestraints'])

            # Count the violations of each restraint for all complexes
            # consistent with EXACTLY N restraints
            k.count_violations(q, g['restraints'], self.rotations[n],
                               g['access_interspace'], g['viol_counter'],
                               self.weights[n])

            # inform user
            if _stdout.isatty():
                self._print_progress(n, g['nrot'], time0)

        # wait for calculations to finish
        self.queue.finish()

        # transfer the data from GPU to CPU
        # get the number of accessible complexes and reduce the subhistograms
        # to the final histogram
        access_complexes = g['subhists'].get().sum(axis=0)
        # account for the fact that we are counting the number of accessible
        # complexes consistent with EXACTLY N restraints
        access_complexes[0] = tot_complexes.get() - sum(access_complexes[1:])
        d['accessible_complexes'] = access_complexes
        d['accessible_interaction_space'] = g['best_access_interspace'].get()

        # get the violation submatrices and reduce it to the final violation
        # matrix
        d['violations'] = g['viol_counter'].get().sum(axis=0)
    def decode_OpenCL_belief_propagation(self, received_blocks,buffer_in=False,return_buffer=False):
        # Set up OpenCL
        if buffer_in:
            channel_values_buffer = received_blocks
        else:
            channel_values_buffer = cl_array.to_device(self.queue,received_blocks.astype(np.float64))

        varnode_output_buffer = cl_array.empty(self.queue, received_blocks.shape, dtype=np.float64)


        self.send_prog(self.queue, received_blocks.shape, None,
                  channel_values_buffer.data,
                  self.inbox_memory_start_varnodes_buffer.data,
                  self.degree_varnode_nr_buffer.data,
                  self.target_memorycells_varnodes_buffer.data,
                  self.checknode_inbox_buffer.data)
        self.queue.finish()
        syndrome_zero = False
        i_num = 1

        while (i_num<self.imax) and (not syndrome_zero):

            local_size = None

            self.checknode_update_prog(self.queue, (self.degree_checknode_nr.shape[0], received_blocks[:,np.newaxis].shape[-1]), None,
                                   self.checknode_inbox_buffer.data,
                                   self.inbox_memory_start_checknodes_buffer.data,
                                   self.degree_checknode_nr_buffer.data,
                                   self.target_memorycells_checknodes_buffer.data,
                                   self.varnode_inbox_buffer.data)

            self.queue.finish()
            self.varnode_update_prog(self.queue, received_blocks.shape , None,
                                channel_values_buffer.data,
                                self.varnode_inbox_buffer.data,
                                self.inbox_memory_start_varnodes_buffer.data,
                                self.degree_varnode_nr_buffer.data,
                                self.target_memorycells_varnodes_buffer.data,
                                self.checknode_inbox_buffer.data)

            self.calc_syndrom_prog(self.queue, (self.degree_checknode_nr.shape[0], received_blocks[:,np.newaxis].shape[-1]), None,
                                      self.checknode_inbox_buffer.data,
                                      self.inbox_memory_start_checknodes_buffer.data,
                                      self.degree_checknode_nr_buffer.data,
                                      self.syndrom_buffer.data)


            if cl_array.sum(self.syndrom_buffer).get() == 0:
                syndrome_zero =True


            i_num += 1


        self.varoutput_prog(self.queue, received_blocks.shape , None,
                            channel_values_buffer.data,
                            self.varnode_inbox_buffer.data,
                            self.inbox_memory_start_varnodes_buffer.data,
                            self.degree_varnode_nr_buffer.data,
                            varnode_output_buffer.data)
        self.queue.finish()
        if return_buffer:
            return varnode_output_buffer
        else:
            output_values = varnode_output_buffer.get()
            return output_values
예제 #34
0
파일: math_.py 프로젝트: sehlstrom/compas
def sum_cl(queue, a, axis=None):

    """ Sum of GPUArray elements in a given axis direction or all elements.

    Parameters
    ----------
    queue
        PyOpenCL queue.
    a : gpuarray
        GPUArray with elements to be operated on.
    axis : int
        Axis direction to sum through, all if None.

    Returns
    -------
    gpuarray
        GPUArray sum.

    Notes
    -----
    - This is temporary and not an efficient implementation.

    """

    if axis is not None:

        m, n = a.shape

        kernel = cl.Program(queue.context, """

        __kernel void sum0_cl(__global float *a, __global float *b, unsigned m, unsigned n)
        {
            int bid = get_group_id(0);
            int tid = get_local_id(1);
            int id  = get_global_id(1) * n + get_global_id(0);
            int stride = 0;

            __local float sum[32000 / sizeof(float)];
            sum[tid] = a[id];
            sum[m] = 0.;

            for (stride = 1; stride < m; stride *= 2)
            {
                barrier(CLK_LOCAL_MEM_FENCE);
                if (tid % (2 * stride) == 0)
                {
                    sum[tid] += sum[tid + stride];
                }
            }

            b[bid] = sum[0];
        }

        __kernel void sum1_cl(__global float *a, __global float *b, unsigned m, unsigned n)
        {
            int bid = get_group_id(1);
            int tid = get_local_id(0);
            int id  = get_global_id(1) * n + get_global_id(0);
            int stride = 0;

            __local float sum[32000 / sizeof(float)];
            sum[tid] = a[id];
            sum[n] = 0.;

            for (stride = 1; stride < n; stride *= 2)
            {
                barrier(CLK_LOCAL_MEM_FENCE);
                if (tid % (2 * stride) == 0)
                {
                    sum[tid] += sum[tid + stride];
                }
            }

            b[bid] = sum[0];
        }

        """).build()

        if axis == 0:

            b = cl_array.empty(queue, (1, n), dtype=float32)
            kernel.sum0_cl(queue, (n, m), (1, m), a.data, b.data, uint32(m), uint32(n))

        elif axis == 1:

            b = cl_array.empty(queue, (m, 1), dtype=float32)
            kernel.sum1_cl(queue, (n, m), (n, 1), a.data, b.data, uint32(m), uint32(n))

        return b

    else:
        return cl_array.sum(a)
예제 #35
0
 def _evaluate(self, valuation, cache):
     if id(self) not in cache:
         op = self.ops[0]._evaluate(valuation, cache)
         cache[id(self)] = clarray.sum(
             op, dtype=np.dtype('float32')) / np.float32(op.size)
     return cache[id(self)]
def spec(filename, extra):
        cuantas = extra[0]
        OPEN_IMAGE = extra[1]
        if(OPEN_IMAGE==True):
            a = Image.open(filename)
            Nx, Ny = a.size
        else: # np array
            a = filename
            #Nx, Ny = a.shape
            Nx, Ny = a.size
        L = Nx*Ny

        points = []     # number of elements in the structure
        RESHAPE = extra[3]
        CONVERT = extra[2]
        if(CONVERT == True):
            #gray = a.convert('L') # rgb 2 gray
            #arr = np.array(gray.getdata()).astype(np.int32)
            arr = np.array(filename.getdata()).astype(np.int32)
        else:
            if(RESHAPE == True): # ARGHH
                arr = np.array(a).reshape(a.shape[0]*a.shape[1])
            else:
                arr = a

        alphaIm = np.zeros((Nx,Ny), dtype=np.float32 ) # Nx rows x Ny columns

        l = 4 # (maximum window size-1) / 2
        temp = map(lambda i: 2*i+1, range(l))
        temp = np.log(temp)
        measure = np.zeros(l*Ny).astype(np.int32)

        b = np.vstack((temp,np.ones((1,l)))).T
        AA=coo_matrix(np.kron(np.identity(Ny), b))     

        prg = cl.Program(ctx, """
        __kernel void measure(__global float *alphaIm, __global int *img, const int Nx,
                                const int Ny, const int size) {
             int i = get_global_id(0);
             int j = get_global_id(1);

             // make histogram of region
             int hist[256];
             int t;
             for(t = 0; t < 256; t++) hist[t] = 0;
             int xi = max(i-size,0);
             int yi = max(j-size,0);
             int xf = min(i+size,Nx-1);
             int yf = min(j+size,Ny-1);
             int u , v;
             for(int u = xi; u <= xf; u++)
                 for(int v = yi; v <= yf; v++)
                    hist[img[u*Ny+v]]++;
             float res = 0;
             int s;
             float total = (yf-yi)*(xf-xi); // size of region
             for(s = 0; s <= 255; s++) {
                 float v = hist[s]/total; // probability
                 res += v*log2(v+0.0001);
             }

             alphaIm[i*Ny+j] = res;
            
        }
        """).build()

        #d = measure.shape[0]/2
        #ms = measure[0:l*d]
        img_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=arr)
        alphaIm_buf = cl.Buffer(ctx, mf.WRITE_ONLY, alphaIm.nbytes)
        sh = alphaIm.shape

        size = 8 # Window size
        prg.measure(queue, sh, None, alphaIm_buf, img_buf, np.int32(Nx), np.int32(Ny), np.int32(size))
        cl.enqueue_read_buffer(queue,alphaIm_buf,alphaIm).wait()

        maxim = np.max(alphaIm)
        minim = np.min(alphaIm)
        #print maxim, minim

        import matplotlib
        from matplotlib import pyplot as plt
        # Alpha image
        #plt.imshow(alphaIm, cmap=matplotlib.cm.gray)
        #plt.show()
        #return

        paso = (maxim-minim)/cuantas
        if(paso <= 0):
            # the alpha image is monofractal
            clases = np.array(map(lambda i: i+minim,np.zeros(cuantas))).astype(np.float32)
        else:
            clases = np.arange(minim,maxim,paso).astype(np.float32)


        # Window
        cant = int(np.floor(np.log(Nx)))

        # concatenate the image A as [[A,A],[A,A]]
        hs = np.hstack((alphaIm,alphaIm))
        alphaIm = np.vstack((hs,hs))

        prg = cl.Program(ctx, """
            __kernel void krnl(__global int *flag, __global float *clases, 
                               __global float* alphaIm,const int sizeBlocks, const int Ny,
                                const int numBlocks_y, const int c, const int cuantas,
                                float minim, float maxim) {
                int i = get_global_id(0);
                int j = get_global_id(1);
                int xi = i*sizeBlocks;
                int xf = (i+1)*sizeBlocks-1;
                int yi = j*sizeBlocks;
                int yf = (j+1)*sizeBlocks-1;

                // calculate max and min for this subregion
                float maxx;
                float minn;
                int w,t;
                int first = 0;
                for(w = xi; w < xf; w++) {
                    for(t = yi; t < yf; t++) {
                        float v = alphaIm[w*Ny*2 + t];
                        if (v >= clases[c] and v <= clases[c+1]) {
                            if(!first) { first = 1; maxx = minn = v; }
                            if(v > maxx) maxx = v;
                            if(v < minn) minn = v;
                        }
                    }
                }
                float totalDif = maxim - minim;
                int nB = numBlocks_y; // num of subdivisions in the Z coordinate
                int l = floor(((maxx-minim)/totalDif)*nB)+1;
                int k = floor(((minn-minim)/totalDif)*nB)+1;
                flag[i*numBlocks_y + j] = l-k+1;
            }
        """).build()  

        # Multifractal dimentions
        falpha = np.zeros(cuantas).astype(np.float32)
        clases_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=clases.astype(np.float32))
        alphaIm_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=alphaIm.astype(np.float32))
        for c in range(cuantas):
            N = np.zeros(cant+1)
            # window sizes
            for k in range(1,cant+2):
                sizeBlocks = 2*k+1
                numBlocks_x = int(np.floor(Nx/sizeBlocks))
                numBlocks_y = int(np.floor(Ny/sizeBlocks))

                flag = np.zeros((numBlocks_x,numBlocks_y)).astype(np.int32)
                flag_buf = cl.Buffer(ctx, mf.WRITE_ONLY, flag.nbytes)
                sh = flag.shape            

                prg.krnl(queue, sh, None, flag_buf, clases_buf, alphaIm_buf, np.int32(sizeBlocks), np.int32(Ny), np.int32(numBlocks_y), np.int32(c), np.int32(cuantas), np.float32(minim), np.float32(maxim))
                cl.enqueue_read_buffer(queue, flag_buf, flag).wait()
                N[k-1] = cla.sum(cla.to_device(queue,flag)).get()

            #print N
            # Haussdorf (box) dimention of the alpha distribution
            falpha[c] = -np.polyfit(map(lambda i: np.log((2*i+1)),range(1,cant+2)),np.log(map(lambda i: i+1,N)),1)[0]
        s = np.hstack((clases,falpha))
        return s
예제 #37
0
    def one_iteration(self,iteration):
        # iterate through one cycle of the simulation.
        assert self.can_has_domains, "no domains set!"
        assert self.can_has_envelope, "no goal envelope set!"
        
        # first, copy the current state of the domain pattern to a holding buffer ("incoming")
        self.copy(self.domains,self.incoming)
        
        # now find the domain walls. modifications to the domain pattern due to rescaling only take place in the walls
        self.findwalls.execute(self.queue,(self.N,self.N),self.domains.data,self.allwalls.data,self.poswalls.data,self.negwalls.data,np.int32(self.N))
        
        if 'walls1'     in self.returnables_list: self.returnables['walls1']     = self.allwalls.get()
        if 'pos_walls1' in self.returnables_list: self.returnables['pos_walls1'] = self.poswalls.get()
        if 'neg_walls1' in self.returnables_list: self.returnables['neg_walls1'] = self.negwalls.get()
        
        # run the ising bias
        self.ising(self.domains,self.alpha)
        
        # rescale the domains. this operates on the class variables so no arguments are passed. self.domains stores the rescaled real-valued domains. the rescaled
        # domains are bounded to the range +1 -1.
        self._rescale_speckle()
        self.bound(self.domains,self.domains)
        if 'bounded' in self.returnables_list: self.returnables['bounded'] = self.domains.get()
        
        # if making an ordering island, this is the command that enforces the border condition
        #if use_boundary and n > boundary_turn_on: self.enforce_boundary(self.domains,self.boundary,self.boundary_values)
        
        # so now we have self.incoming (old domains) and self.domains (rescaled domains). we want to use self.walls to enforce changes to the domain pattern
        # from rescaling only within the walls. because updating can change wall location, also refind the walls.
        if self.only_walls:
            self.update_domains(self.domains,self.incoming,self.allwalls)
            self.findwalls.execute(self.queue,(self.N,self.N),self.domains.data,self.allwalls.data,self.poswalls.data,self.negwalls.data,np.int32(self.N))
        
        if iteration > self.m_turnon:
            self.findwalls.execute(self.queue,(self.N,self.N),self.domains.data,self.allwalls.data,self.poswalls.data,self.negwalls.data,np.int32(self.N))
            
            if 'walls2'     in self.returnables_list: self.returnables['walls2']     = self.allwalls.get()
            if 'pos_walls2' in self.returnables_list: self.returnables['pos_walls2'] = self.poswalls.get()
            if 'neg_walls2' in self.returnables_list: self.returnables['neg_walls2'] = self.negwalls.get()

            # now attempt to adjust the net magnetization in real space to achieve the target magnetization.
            net_m = cla.sum(self.domains).get()
            needed_m = self.goal_m-net_m
    
            if needed_m > 0:
                self.make_available1(self.available,self.negwalls,self.negpins,self.pospins)
                sites = cla.sum(self.available).get()
                spa = min([self.spa_max,needed_m/sites])
                
            if needed_m < 0:
                self.make_available1(self.available,self.poswalls,self.negpins,self.pospins)
                sites = cla.sum(self.available).get()
                spa = max([-1*self.spa_max,needed_m/sites])

            self.promote_spins(self.domains,self.available,spa)
            
            if 'promoted' in self.returnables_list: self.returnables['promoted'] = self.domains.get()
            
            self.bound(self.domains,self.domains)
            
        if 'domains' in self.returnables_list: self.returnables['domains'] = self.domains.get()
예제 #38
0
파일: addtest.py 프로젝트: sumeesha/opencl
# ge = p.globalError(4,4,x)
# print ge[0]
# # a = []
# # print numpy.sum(x[0])
# # print time.clock() - start

import pyopencl as cl  # Import the OpenCL GPU computing API
import pyopencl.array as pycl_array  # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy as np  # Import Numpy number tools

context = cl.create_some_context()  # Initialize the Context
queue = cl.CommandQueue(context)  # Instantiate a Queue
x = pycl_array.to_device(queue, np.random.rand(3920, 100).astype(np.float32))
# a = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))
# b = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))
y = np.random.rand(3920, 100).astype(np.float32)
# Create two random pyopencl arrays
# c = pycl_array.empty_like(a)  # Create an empty pyopencl destination array
start = time.clock()
d = []
for i in range(3920):
    d.append(pycl_array.sum(x[i]))
    # d = numpy.sum(y[i])

print time.clock() - start

# print("x: {}".format(x))
# print("d: {}".format(d))
# Print all three arrays, to show sum() worked
    def decode_OpenCL(self,
                      received_blocks,
                      buffer_in=False,
                      return_buffer=False):
        # Set up OpenCL
        if buffer_in:
            channel_values_buffer = received_blocks
        else:
            channel_values_buffer = cl_array.to_device(
                self.queue, received_blocks.astype(np.int32))

        varnode_output_buffer = cl_array.empty(self.queue,
                                               received_blocks.shape,
                                               dtype=np.int32)

        self.send_prog(self.queue, received_blocks.shape, None,
                       channel_values_buffer.data,
                       self.inbox_memory_start_varnodes_buffer.data,
                       self.degree_varnode_nr_buffer.data,
                       self.target_memorycells_varnodes_buffer.data,
                       self.checknode_inbox_buffer.data)
        #self.queue.finish()

        self.first_iter_prog(self.queue,
                             (self.degree_checknode_nr.shape[0],
                              received_blocks[:, np.newaxis].shape[-1]), None,
                             self.checknode_inbox_buffer.data,
                             self.inbox_memory_start_checknodes_buffer.data,
                             self.degree_checknode_nr_buffer.data,
                             self.target_memorycells_checknodes_buffer.data,
                             self.varnode_inbox_buffer.data,
                             self.cardinality_T_channel,
                             self.cardinality_T_decoder_ops,
                             self.Trellis_checknode_vector_a_buffer.data)

        syndrome_zero = False
        i_num = 1

        while (i_num < self.imax) and (not syndrome_zero):

            local_size = None  #(1000, 1)

            self.varnode_update_prog(
                self.queue, received_blocks.shape, local_size,
                channel_values_buffer.data, self.varnode_inbox_buffer.data,
                self.inbox_memory_start_varnodes_buffer.data,
                self.degree_varnode_nr_buffer.data,
                self.target_memorycells_varnodes_buffer.data,
                self.checknode_inbox_buffer.data, self.cardinality_T_channel,
                self.cardinality_T_decoder_ops, i_num - 1,
                self.Trellis_varnode_vector_a_buffer.data)
            #self.queue.finish()

            self.checknode_update_prog(
                self.queue, (self.degree_checknode_nr.shape[0],
                             received_blocks[:, np.newaxis].shape[-1]), None,
                self.checknode_inbox_buffer.data,
                self.inbox_memory_start_checknodes_buffer.data,
                self.degree_checknode_nr_buffer.data,
                self.target_memorycells_checknodes_buffer.data,
                self.varnode_inbox_buffer.data, self.cardinality_T_channel,
                self.cardinality_T_decoder_ops, i_num - 1,
                self.Trellis_checknode_vector_a_buffer.data)

            #self.queue.finish()

            self.calc_syndrom_prog(
                self.queue, (self.degree_checknode_nr.shape[0],
                             received_blocks[:, np.newaxis].shape[-1]), None,
                self.checknode_inbox_buffer.data,
                self.inbox_memory_start_checknodes_buffer.data,
                self.degree_checknode_nr_buffer.data,
                self.cardinality_T_decoder_ops, self.syndrom_buffer.data)

            #self.queue.finish()

            if cl_array.sum(self.syndrom_buffer).get() == 0:
                syndrome_zero = True

            i_num += 1

        self.varoutput_prog(self.queue, received_blocks.shape, None,
                            channel_values_buffer.data,
                            self.varnode_inbox_buffer.data,
                            self.inbox_memory_start_varnodes_buffer.data,
                            self.degree_varnode_nr_buffer.data,
                            self.cardinality_T_channel,
                            self.cardinality_T_decoder_ops, i_num - 1,
                            self.Trellis_varnode_vector_a_buffer.data,
                            varnode_output_buffer.data)
        self.queue.finish()
        if return_buffer:
            return varnode_output_buffer
        else:
            pass
            output_values = varnode_output_buffer.get()
            return output_values
예제 #40
0
    def _gpu_init(self):

        q = self.queue

        # Move arrays to GPU
        self._cl_rcore = cl_array.to_device(q, self._rcore.astype(np.float32))
        self._cl_rsurf = cl_array.to_device(q, self._rsurf.astype(np.float32))
        self._cl_lcore = cl_array.to_device(q, self._lcore.astype(np.float32))

        # Make the rotations float16 arrays
        self._cl_rotations = np.zeros((self.rotations.shape[0], 16), dtype=np.float32)
        self._cl_rotations[:, :9] = self.rotations.reshape(-1, 9)

        # Allocate arrays
        # Float32
        self._cl_shape = tuple(self._shape)
        arr_names = 'rot_lcore clashvol intervol tmp'.split()
        for arr_name in arr_names:
            setattr(self, '_cl_' + arr_name, 
                    cl_array.zeros(q, self._cl_shape, dtype=np.float32)
                    )

        # Int32
        arr_names = 'interspace red_interspace restspace access_interspace'.split()
        for arr_name in arr_names:
            setattr(self, '_cl_' + arr_name, 
                    cl_array.zeros(q, self._cl_shape, dtype=np.int32)
                    )

        # Boolean
        arr_names = 'not_clashing interacting'.split()
        for arr_name in arr_names:
            setattr(self, '_cl_' + arr_name, 
                    cl_array.zeros(q, self._cl_shape, dtype=np.int32)
                    )

        # Complex64
        self._ft_shape = tuple([self._shape[0] // 2 + 1] + list(self._shape)[1:])
        arr_names = 'lcore lcore_conj rcore rsurf tmp'.split()
        for arr_name in arr_names:
            setattr(self, '_cl_ft_' + arr_name, 
                    cl_array.empty(q, self._ft_shape, dtype=np.complex64)
                    )

        # Restraints arrays
        self._cl_rrestraints = np.zeros((self._nrestraints, 4), dtype=np.float32)
        self._cl_rrestraints[:, :3] = self._rrestraints
        self._cl_rrestraints = cl_array.to_device(q, self._cl_rrestraints)
        self._cl_lrestraints = np.zeros((self._nrestraints, 4), dtype=np.float32)
        self._cl_lrestraints[:, :3] = self._lrestraints
        self._cl_lrestraints = cl_array.to_device(q, self._cl_lrestraints)
        self._cl_mindis = cl_array.to_device(q, self._mindis.astype(np.float32))
        self._cl_maxdis = cl_array.to_device(q, self._maxdis.astype(np.float32))
        self._cl_mindis2 = cl_array.to_device(q, self._mindis.astype(np.float32) ** 2)
        self._cl_maxdis2 = cl_array.to_device(q, self._maxdis.astype(np.float32) ** 2)
        self._cl_rot_lrestraints = cl_array.zeros_like(self._cl_rrestraints)
        self._cl_restraints_center = cl_array.zeros_like(self._cl_rrestraints)

        # kernels
        self._kernel_constants = {'interaction_cutoff': 10, 
                            'nrestraints': self._nrestraints,
                            'shape_x': self._shape[2],
                            'shape_y': self._shape[1],
                            'shape_z': self._shape[0],
                            'llength': self._llength,
                            'nreceptor_coor': 0,
                            'nligand_coor': 0,
                            }

        # Counting arrays
        self._cl_hist = cl_array.zeros(self.queue, self._nrestraints, dtype=np.int32)
        self._cl_consistent_complexes = cl_array.zeros(self.queue,
                self._nrestraints, dtype=np.float32)
        self._cl_viol_hist = cl_array.zeros(self.queue, (self._nrestraints,
            self._nrestraints), dtype=np.int32)
        self._cl_violations = cl_array.zeros(self.queue, (self._nrestraints,
            self._nrestraints), dtype=np.float32)

        # Conversions
        self._cl_grid_max_clash = np.float32(self._grid_max_clash)
        self._cl_grid_min_interaction = np.float32(self._grid_min_interaction)
        self._CL_ZERO = np.int32(0)

        # Occupancy analysis
        self._cl_occ_grid = {}
        if self.occupancy_analysis:
            for i in xrange(self.interaction_restraints_cutoff, self._nrestraints + 1):
                self._cl_occ_grid[i] = cl_array.zeros(self.queue,
                        self._cl_shape, dtype=np.float32)

        # Interaction analysis
        if self._interaction_analysis:
            shape = (self._lselect.shape[0], self._rselect.shape[0])
            self._cl_interaction_hist = cl_array.zeros(self.queue, shape,
                    dtype=np.int32)
            self._cl_interaction_matrix = {}
            for i in xrange(self._nrestraints + 1 - self.interaction_restraints_cutoff):
                self._cl_interaction_matrix[i] = cl_array.zeros(self.queue, shape,
                        dtype=np.float32)
            # Coordinate arrays
            self._cl_rselect = np.zeros((self._rselect.shape[0], 4), dtype=np.float32)
            self._cl_rselect[:, :3] = self._rselect
            self._cl_rselect = cl_array.to_device(q, self._cl_rselect)
            self._cl_lselect = np.zeros((self._lselect.shape[0], 4), dtype=np.float32)
            self._cl_lselect[:, :3] = self._lselect
            self._cl_lselect = cl_array.to_device(q, self._cl_lselect)
            self._cl_rot_lselect = cl_array.zeros_like(self._cl_lselect)

            # Update kernel constants
            self._kernel_constants['nreceptor_coor'] = self._cl_rselect.shape[0]
            self._kernel_constants['nligand_coor'] = self._cl_lselect.shape[0]

        self._cl_kernels = Kernels(q.context, self._kernel_constants)
        self._cl_rfftn = pyclfft.RFFTn(q.context, self._shape)
        self._cl_irfftn = pyclfft.iRFFTn(q.context, self._shape)

        # Initial calculations
        self._cl_rfftn(q, self._cl_rcore, self._cl_ft_rcore)
        self._cl_rfftn(q, self._cl_rsurf, self._cl_ft_rsurf)
        self._cl_tot_complex = cl_array.sum(self._cl_interspace, dtype=np.dtype(np.float32))
예제 #41
0
 def _sum_OCL(a, dtype=None, queue=None, slice=None):
     return cl_array.sum(a=a, dtype=dtype, queue=queue,
                         slice=slice).get(queue=queue)
예제 #42
0
    def find_contacts(self, predict=True):
        """Call the find_contacts kernel.

        Assumes that cell_centers, cell_dirs, cell_lens, cell_rads,
        cell_sqs, cell_dcenters, cell_dlens, cell_dangs,
        sorted_ids, and sq_inds are current on the device.

        Calculates cell_n_cts, ct_frs, ct_tos, ct_dists, ct_pts,
        ct_norms, ct_reldists, and n_cts.
        """
        if predict:
            centers = self.pred_cell_centers_dev
            dirs = self.pred_cell_dirs_dev
            lens = self.pred_cell_lens_dev
        else:
            centers = self.cell_centers_dev
            dirs = self.cell_dirs_dev
            lens = self.cell_lens_dev

        self.program.find_plane_contacts(self.queue,
                                         (self.n_cells,),
                                         None,
                                         numpy.int32(self.max_cells),
                                         numpy.int32(self.max_contacts),
                                         numpy.int32(self.n_planes),
                                         self.plane_pts_dev.data,
                                         self.plane_norms_dev.data,
                                         self.plane_coeffs_dev.data,
                                         centers.data,
                                         dirs.data,
                                         lens.data,
                                         self.cell_rads_dev.data,
                                         self.cell_n_cts_dev.data,
                                         self.ct_frs_dev.data,
                                         self.ct_tos_dev.data,
                                         self.ct_dists_dev.data,
                                         self.ct_pts_dev.data,
                                         self.ct_norms_dev.data,
                                         self.ct_reldists_dev.data,
                                         self.ct_stiff_dev.data).wait()

        self.program.find_contacts(self.queue,
                                   (self.n_cells,),
                                   None,
                                   numpy.int32(self.max_cells),
                                   numpy.int32(self.n_cells),
                                   numpy.int32(self.grid_x_min),
                                   numpy.int32(self.grid_x_max),
                                   numpy.int32(self.grid_y_min),
                                   numpy.int32(self.grid_y_max),
                                   numpy.int32(self.n_sqs),
                                   numpy.int32(self.max_contacts),
                                   centers.data,
                                   dirs.data,
                                   lens.data,
                                   self.cell_rads_dev.data,
                                   self.cell_sqs_dev.data,
                                   self.sorted_ids_dev.data,
                                   self.sq_inds_dev.data,
                                   self.cell_n_cts_dev.data,
                                   self.ct_frs_dev.data,
                                   self.ct_tos_dev.data,
                                   self.ct_dists_dev.data,
                                   self.ct_pts_dev.data,
                                   self.ct_norms_dev.data,
                                   self.ct_reldists_dev.data,
                                   self.ct_stiff_dev.data).wait()

        # set dtype to int32 so we don't overflow the int32 when summing
        #self.n_cts = self.cell_n_cts_dev.get().sum(dtype=numpy.int32)
        self.n_cts = cl_array.sum(self.cell_n_cts_dev).get()
예제 #43
0
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype,
                          timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    L = (3, 5, 7)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    if h == 0:
        def get_evals_2(k, dx):
            return - k**2

        derivs = ps.SpectralCollocator(fft, dk)
    else:
        from pystella.derivs import SecondCenteredDifference
        get_evals_2 = SecondCenteredDifference(h).get_eigenvalues
        derivs = ps.FiniteDifferencer(mpi, h, dx, stream=False)

    solver = ps.SpectralPoissonSolver(fft, dk, dx, get_evals_2)

    pencil_shape = tuple(ni + 2*h for ni in rank_shape)

    statistics = ps.FieldStatistics(mpi, 0, rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    fx = cla.empty(queue, pencil_shape, dtype)
    rho = clr.rand(queue, rank_shape, dtype)
    rho -= statistics(rho)["mean"]
    lap = cla.empty(queue, rank_shape, dtype)
    rho_h = rho.get()

    for m_squared in (0, 1.2, 19.2):
        solver(queue, fx, rho, m_squared=m_squared)
        fx_h = fx.get()
        if h > 0:
            fx_h = fx_h[h:-h, h:-h, h:-h]

        derivs(queue, fx=fx, lap=lap)

        diff = np.fabs(lap.get() - rho_h - m_squared * fx_h)
        max_err = np.max(diff) / cla.max(clm.fabs(rho))
        avg_err = np.sum(diff) / cla.sum(clm.fabs(rho))

        max_rtol = 1e-12 if dtype == np.float64 else 1e-4
        avg_rtol = 1e-13 if dtype == np.float64 else 1e-5

        assert max_err < max_rtol and avg_err < avg_rtol, \
            f"solution inaccurate for {h=}, {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        time = timer(lambda: solver(queue, fx, rho, m_squared=m_squared), ntime=10)

        if mpi.rank == 0:
            print(f"poisson took {time:.3f} ms for {grid_shape=}, {proc_shape=}")
def pyopencl_mean(x_gpu_in):
    return cl_array.sum(x_gpu_in) / float(x_gpu_in.size)
예제 #45
0
def spec(filename, extra):
        cuantas = extra[0]
        OPEN_IMAGE = extra[1]
        if(OPEN_IMAGE==True):
            a = Image.open(filename)
            Nx, Ny = a.size
        else: # np array
            a = filename
            #Nx, Ny = a.shape
            Nx, Ny = a.size
        L = Nx*Ny

        points = []     # number of elements in the structure
        RESHAPE = extra[3]
        CONVERT = extra[2]
        if(CONVERT == True):
            gray = a.convert('L') # rgb 2 gray
            arr = np.array(gray.getdata()).astype(np.int32)
        else:
            if(RESHAPE == True): # ARGHH
                arr = np.array(a).reshape(a.shape[0]*a.shape[1])
            else:
                arr = a

        alphaIm = np.zeros((Nx,Ny), dtype=np.double ) # Nx rows x Ny columns

        l = 4 # (maximum window size-1) / 2
        temp = map(lambda i: 2*i+1, range(l))
        temp = np.log(temp)
        measure = np.zeros(l*Ny).astype(np.int32)

        b = np.vstack((temp,np.ones((1,l)))).T
        AA=coo_matrix(np.kron(np.identity(Ny), b))     

        # which: which measure to take
        which = extra[4]

        prg = cl.Program(ctx, """
        int maxx(__global int *img, int x1, int y1, int x2, int y2, const int Ny) {
            int i, j;
            int maxim = 0;
            for(i = x1; i < x2; i++)
                for(j = y1; j < y2; j++)
                    if(img[i*Ny + j] > maxim) maxim = img[i*Ny + j];

            return maxim;
        }
        int minn(__global int *img, int x1, int y1, int x2, int y2, const int Ny) {
            int i, j;
            int minim = 255;
            for(i = x1; i < x2; i++)
                for(j = y1; j < y2; j++)
                    if(img[i*Ny + j] < minim) minim = img[i*Ny + j];

            return minim;
        }
        int summ(__global int *img, int x1, int y1, int x2, int y2, const int Ny) {
            int i, j;
            int summ = 0;
            for(i = x1; i < x2; i++)
                for(j = y1; j < y2; j++)
                    summ += img[i*Ny + j];

            return summ;
        }
        int iso(__global int *img, int x1, int y1, int x2, int y2, const int Ny, const int x, const int y) {
            int i, j;
            int cant = 0;
            for(i = x1; i < x2; i++)
                for(j = y1; j < y2; j++)
                    if(img[i*Ny + j] == img[x*Ny + y]) cant++;

            return cant;
        }
        __kernel void measure(__global int *dest, __global int *img, const int Nx,
                                const int Ny, const int l, int i, const int d, const int which) {
             int j = get_global_id(0);
             int jim = (int)(j/l)+d;
             if(which == 0)
                 dest[j] = maxx(img,max(i-((j%l)+1),0),max(jim-((j%l)+1),0),
                                    min(i+(j%l)+1,Nx-1),min(jim+(j%l)+1,Ny-1), Ny) + 1;
             if(which == 1)
                 dest[j] = minn(img,max(i-((j%l)+1),0),max(jim-((j%l)+1),0),
                                    min(i+(j%l)+1,Nx-1),min(jim+(j%l)+1,Ny-1), Ny) + 1;
             if(which == 2)
                 dest[j] = summ(img,max(i-((j%l)+1),0),max(jim-((j%l)+1),0),
                                    min(i+(j%l)+1,Nx-1),min(jim+(j%l)+1,Ny-1), Ny) + 1;
             if(which == 3)
                 dest[j] = iso(img,max(i-((j%l)+1),0),max(jim-((j%l)+1),0),
                                    min(i+(j%l)+1,Nx-1),min(jim+(j%l)+1,Ny-1), Ny, i, j) + 1;
            
        }
        """).build()

        d = measure.shape[0]/2
        ms = measure[0:l*d]
        img_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=arr)
        dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, ms.nbytes)
        sh = ms.shape

        for i in range(Nx):
            prg.measure(queue, sh, None, dest_buf, img_buf, np.int32(Nx), np.int32(Ny), np.int32(l), np.int32(i), np.int32(0), np.int32(which))
            cl.enqueue_read_buffer(queue, dest_buf, measure[0:l*d]).wait()
            prg.measure(queue, sh, None, dest_buf, img_buf, np.int32(Nx), np.int32(Ny), np.int32(l), np.int32(i), np.int32(d), np.int32(which))
            cl.enqueue_read_buffer(queue, dest_buf, measure[l*d:]).wait()

            # Instead of doing polyfits, a sparse linear system is constructed and solved

            bb=np.log(measure)
            z = linsolve.lsqr(AA,bb)[0]
            z = z.reshape(2,Ny,order = 'F')
            alphaIm[i] = z[0]

        maxim = np.max(alphaIm)
        minim = np.min(alphaIm)

        import matplotlib
        from matplotlib import pyplot as plt
        # Alpha image
        #plt.imshow(alphaIm, cmap=matplotlib.cm.gray)
        #plt.show()
        #return

        paso = (maxim-minim)/cuantas
        if(paso <= 0):
            # the alpha image is monofractal
            clases = np.array(map(lambda i: i+minim,np.zeros(cuantas))).astype(np.float32)
        else:
            clases = np.arange(minim,maxim,paso).astype(np.float32)


        # Window
        cant = int(np.floor(np.log(Nx)))

        # concatenate the image A as [[A,A],[A,A]]
        hs = np.hstack((alphaIm,alphaIm))
        alphaIm = np.vstack((hs,hs))

        prg = cl.Program(ctx, """
            __kernel void krnl(__global int *flag, __global float *clases, 
                               __global float* alphaIm,const int sizeBlocks, const int Ny,
                                const int numBlocks_y, const int c, const int cuantas) {
                int i = get_global_id(0);
                int j = get_global_id(1);
                int xi = i*sizeBlocks;
                int xf = (i+1)*sizeBlocks-1;
                int yi = j*sizeBlocks;
                int yf = (j+1)*sizeBlocks-1;
                if(xf == xi) xf = xf+1;
                if(yf == yi) yf = yf+1;

                int f = 0;
                int s1 = xf-xi;
                int s2 = yf-yi;
                
                if(c != cuantas-1) {
                    // f = 1 if any pixel in block is between clases[c] and clases[c+1]
                    int w, t;
                    for(w = xi; w < xf; w++) {
                        for(t = yi; t < yf; t++) {
                            float b = alphaIm[w*Ny*2 + t];
                            if (b >= clases[c] and b < clases[c+1]) {
                               f = 1;
                               break;
                            }
                        }
                        if(f == 1) break;
                    }
                }
                else {
                    // f = 1 if any pixel in block is equal to classes[c]
                    int w, t;
                    for(w = xi; w < xf; w++) {
                        for(t = yi; t < yf; t++) {
                            float b = alphaIm[w*Ny*2 + t];
                            if (b == clases[c]) { // !!
                               f = 1;
                               break;
                            }
                        }
                        if(f == 1)
                            break;
                    }
                }
                flag[i*numBlocks_y + j] = f;
            }
        """).build()  

        # Multifractal dimentions
        falpha = np.zeros(cuantas).astype(np.float32)
        clases_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=clases.astype(np.float32))
        alphaIm_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=alphaIm.astype(np.float32))
        for c in range(cuantas):
            N = np.zeros(cant+1)
            # window sizes
            for k in range(cant+1):
                sizeBlocks = 2*k+1
                numBlocks_x = int(np.ceil(Nx/sizeBlocks))
                numBlocks_y = int(np.ceil(Ny/sizeBlocks))

                flag = np.zeros((numBlocks_x,numBlocks_y)).astype(np.int32)
                flag_buf = cl.Buffer(ctx, mf.WRITE_ONLY, flag.nbytes)
                sh = flag.shape            

                prg.krnl(queue, sh, None, flag_buf, clases_buf, alphaIm_buf, np.int32(sizeBlocks), np.int32(Ny), np.int32(numBlocks_y), np.int32(c), np.int32(cuantas))
                cl.enqueue_read_buffer(queue, flag_buf, flag).wait()
                N[k] = cla.sum(cla.to_device(queue,flag)).get()

            # Haussdorf (box) dimention of the alpha distribution
            falpha[c] = -np.polyfit(map(lambda i: np.log(i*2+1),range(cant+1)),np.log(map(lambda i: i+1,N)),1)[0]
        s = np.hstack((clases,falpha))
        return s
예제 #46
0
    def _gpu_search(self):
        """Method that actually performs the exhaustive search on the GPU"""

        # make shortcuts
        d = self.data
        g = self.gpu_data
        q = self.queue
        k = g['k']

        # initalize the total number of sampled complexes
        tot_complexes = cl_array.sum(g['interspace'], dtype=np.float32)

        # initialize time
        time0 = _time()

        # loop over all rotations
        for n in xrange(g['nrot']):

            # rotate the scanning chain object
            k.rotate_image3d(q, g['sampler'], g['im_lsurf'],
                    self.rotations[n], g['lsurf'], d['im_center'])

            # perform the FFTs and calculate the clashing and interaction volume
            k.rfftn(q, g['lsurf'], g['ft_lsurf'])
            k.c_conj_multiply(q, g['ft_lsurf'], g['ft_rcore'], g['ft_clashvol'])
            k.irfftn(q, g['ft_clashvol'], g['clashvol'])

            k.c_conj_multiply(q, g['ft_lsurf'], g['ft_rsurf'], g['ft_intervol'])
            k.irfftn(q, g['ft_intervol'], g['intervol'])

            # determine at every position if the conformation is a proper complex
            k.touch(q, g['clashvol'], g['max_clash'],
                    g['intervol'], g['min_interaction'],
                    g['interspace'])

            if self.distance_restraints:
                k.fill(q, g['restspace'], 0)

                # determine the space that is consistent with a number of
                # distance restraints
                k.distance_restraint(q, g['restraints'],
                        self.rotations[n], g['restspace'])

                # get the accessible interaction space also consistent with a
                # certain number of distance restraints
                k.multiply(q, g['restspace'], g['interspace'], g['access_interspace'])


            # calculate the total number of complexes, while taking into
            # account orientational/rotational bias
            tot_complexes += cl_array.sum(g['interspace'], dtype=np.float32)*np.float32(self.weights[n])

            # take at every position in space the maximum number of consistent
            # restraints for later visualization
            cl_array.maximum(g['best_access_interspace'], g['access_interspace'], g['best_access_interspace'])

            # calculate the number of accessable complexes consistent with
            # EXACTLY N distance restraints
            k.histogram(q, g['access_interspace'], g['subhists'], self.weights[n], d['nrestraints'])

            # Count the violations of each restraint for all complexes
            # consistent with EXACTLY N restraints
            k.count_violations(q, g['restraints'], self.rotations[n], 
                    g['access_interspace'], g['viol_counter'], self.weights[n])

            # inform user
            if _stdout.isatty():
                self._print_progress(n, g['nrot'], time0)

        # wait for calculations to finish
        self.queue.finish()

        # transfer the data from GPU to CPU
        # get the number of accessible complexes and reduce the subhistograms
        # to the final histogram
        access_complexes = g['subhists'].get().sum(axis=0)
        # account for the fact that we are counting the number of accessible
        # complexes consistent with EXACTLY N restraints
        access_complexes[0] = tot_complexes.get() - sum(access_complexes[1:])
        d['accessible_complexes'] = access_complexes
        d['accessible_interaction_space'] = g['best_access_interspace'].get()

        # get the violation submatrices and reduce it to the final violation
        # matrix
        d['violations'] = g['viol_counter'].get().sum(axis=0)
예제 #47
0
 def sum(self, a, dtype=None):
     import pyopencl.array as cl_array
     return cl_array.sum(
             a, dtype=dtype, queue=self._array_context.queue).get()[()]
예제 #48
0
    def ggr_iteration(self,iteration):
        
        assert self.can_has_domains, "no domains set!"
        assert self.can_has_envelope, "no goal envelope set!"
        assert self.can_has_ggr, "must set ggr before running ggr_iteration"

        # get the target net magnetization
        net_m       = cla.sum(self.domains).get()
        self.goal_m = self.plan[iteration+1]
        needed_m    = self.goal_m*self.N2-net_m
        self.target = np.sign(needed_m).astype(np.float32)
        
        # copy the current domain pattern to self.incoming
        self.copy(self.domains,self.incoming)
        
        # find the domain walls. these get used in self.make_available. make the correct sites available for modification
        self.findwalls.execute(self.queue,(self.N,self.N),self.domains.data,self.allwalls.data,self.poswalls.data,self.negwalls.data,np.int32(self.N))
        self.make_available1(self.available,self.allwalls,self.negpins,self.pospins)
        #if net_m > self.goal_m: self.make_available1(self.available,self.poswalls,self.negpins,self.pospins)
        #if net_m < self.goal_m: self.make_available1(self.available,self.negwalls,self.negpins,self.pospins)
        
        # run the ising bias
        self.ising(self.domains,self.alpha)
        
        # rescale the domains. this operates on the class variables so no arguments are passed. self.domains stores the rescaled real-valued domains.
        # the rescaled domains are bounded to the range +1 -1.
        # change in the domain pattern is allowed to happen only in the walls.
        # enforce the recency condition to prevent domain splittings (basically, make it hard to revert changes from long ago)
        self._rescale_speckle()
        self.bound(self.domains,self.domains)
        self.only_in_walls(self.domains,self.incoming,self.available)
        self.recency.execute(self.queue,(self.N2,),
                             self.whenflipped.data,self.domains.data,self.incoming.data,
                             self.recency_need.data,self.target,np.int32(iteration)).wait()

        # since the domains have been updated, refind the walls
        self.findwalls.execute(self.queue,(self.N,self.N),self.domains.data,self.allwalls.data,self.poswalls.data,self.negwalls.data,np.int32(self.N))

        # now adjust the magnetization so that it reaches the target
        net_m       = cla.sum(self.domains).get()
        needed_m    = self.goal_m*self.N2-net_m
        self.target = np.sign(needed_m).astype(np.float32)
        if net_m > 0: self.make_available2(self.available,self.poswalls,self.domains,self.target)
        if net_m < 0: self.make_available2(self.available,self.negwalls,self.domains,self.target)
        
        # now we need to run an optimizer to find the correct value for spa. this should result in an
        # update to the class variable self.optimized_spa. optimized_spa is a class variable because spa
        # changes slowly so using the old value as the starting point in the optimization gives a speed up.
        opt_out = fminbound(self._ggr_spa_error,-1,1,full_output=1)
        self.optimized_spa = opt_out[0]
        
        # use the optimized spa value to actually promote the spins in self.domains
        self.ggr_promote_spins(self.domains,self.available,self.domains,self.target*self.optimized_spa)
        self.bound(self.domains,self.domains)
        m_out   = (cla.sum(self.domains).get())/self.N2
        
        # update the whenflipped data, which records the iteration when each pixel changed sign
        self.update_whenflipped.execute(self.queue,(self.N2,),self.whenflipped.data,self.domains.data,self.incoming.data,np.int32(iteration))
        
        print "%.4d, %.3f, %.3f, %.3f"%(iteration, self.goal_m, m_out, self.optimized_spa)
        self.ggr_tracker[iteration] = iteration, self.optimized_spa, m_out
        
        # set a flag to save output. it is better to check m_out than to trust that the
        # self.goal_m, asked for in self.plan, has actually been achieved
        if m_out < self.next_crossing:
            print "***"
            self.checkpoint    = True
            self.crossed       = self.next_crossing
            try:
                self.crossings     = self.crossings[:-1]
                self.next_crossing = self.crossings[-1]
            except IndexError:
                self.next_crossing = 0.01
        else: self.checkpoint = False