Python ocl_api Exemples, reikna.cluda.ocl_api Python Exemples

Exemple #1

0

Afficher le fichier

    def initialize_gpu(self):
        try:
            import reikna.cluda as cluda
            from reikna.fft import FFT 
#             dtype = dtype#numpy.complex64
            data = numpy.zeros( self.st['Kd'],dtype=dtype)
#             data2 = numpy.empty_like(data)
#             if self.debug > 0:
            print('get_platform')
            api = cluda.ocl_api()
#             if self.debug > 0:
            print('api=',api== cluda.ocl_api())
            if api==cluda.cuda_api():
                self.gpu_api = 'cuda'
            elif api==cluda.ocl_api():
                self.gpu_api = 'opencl'
                
            self.thr = api.Thread.create(async=True)      
            self.data_dev = self.thr.to_device(data)
#             self.data_rec = self.thr.to_device(data2)
            axes=range(0,numpy.size(self.st['Kd']))
            myfft=  FFT( data, axes=axes)
            self.myfft = myfft.compile(self.thr,fast_math=True)
 
            self.gpu_flag=1
#             if self.debug > 0:
            print('create gpu fft?',self.gpu_flag)
            print('line 642')

                
            W= self.st['w'][...,0]
#             if self.debug > 0:
            print('line 645')   
                
            self.W = numpy.reshape(W, self.st['Kd'],order='C')
            
#             if self.debug > 0:
            print('line 647')
#             self.thr2 = api.Thread.create() 
            print('line 649')
            self.W_dev = self.thr.to_device(self.W.astype(dtype))
            self.W2_dev = self.thr.to_device(self.W.astype(dtype))
            self.tmp_dev = self.thr.to_device(self.W.astype(dtype)) # device memory
#             self.tmp2_dev = self.thr.to_device(1.0/self.W.astype(dtype)) # device memory
            self.gpu_flag=1      
#             if self.debug > 0:          
            print('line 652')
        except:
            self.gpu_flag=0
#             if self.debug > 0:              
            print('get error, using cpu')

Exemple #2

0

Afficher le fichier

def kspacegaussian_filter_CL(ksp, sigma):
    from reikna import cluda
    from reikna.cluda import functions, dtypes
    sz = np.array(ksp.shape)
    dtype = np.complex64
    ftype = np.float32
    api = cluda.ocl_api()
    thr = api.Thread.create()
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ${ultype} x = (${ultype}) get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
  const ${ftype} SQRT2PI = 2.5066282746;
  const double CUBEDSQRT2PI = 15.749609945722419;
  const ${ultype} idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  //Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x) , (double) dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  //${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  //${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight;
  dest[idx].y = src[idx].y * weight; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
        render_kwds=dict(ctype=dtypes.ctype(dtype),
                         ftype=dtypes.ctype(ftype), ultype=dtypes.ctype(np.uint64),
                         exp=functions.exp(ftype)), fast_math=True)

    gauss_kernel = program.gauss_kernel
    data_dev = thr.empty_like(ksp)
    gauss_kernel(data_dev, data_dev, global_size=sz[0] * sz[1] * sz[2])
    ksp_out = data_dev.get()

    ifft = FFT(data_dev)
    cifft = ifft.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    result = np.fft.fftshift(data_dev.get() / sz[0] * sz[1] * sz[2])
    result = result[::-1, ::-1, ::-1]
    result = np.roll(np.roll(np.roll(result, 1, axis=2), 1, axis=1), 1, axis=0)

    return ksp_out

Exemple #3

0

Afficher le fichier

Fichier : reikna_test.py Projet : csfoo/TF_binding

def main2():
    N = 256

    api = cluda.ocl_api()
    thr = api.Thread.create()

    program = thr.compile("""
    KERNEL void multiply_them(
        GLOBAL_MEM float *dest,
        GLOBAL_MEM float *a,
        GLOBAL_MEM float *b)
    {
      const SIZE_T i = get_local_id(0);
      dest[i] = a[i] * b[i];
    }
    """)

    multiply_them = program.multiply_them

    a = numpy.random.randn(N).astype(numpy.float32)
    b = numpy.random.randn(N).astype(numpy.float32)
    a_dev = thr.to_device(a)
    b_dev = thr.to_device(b)
    dest_dev = thr.empty_like(a_dev)

    for i in xrange(100000):
        #res = a_dev * b_dev
        multiply_them(dest_dev, a_dev, b_dev, local_size=N, global_size=N)
    #print dest_dev.get()

    print((dest_dev.get() - a * b == 0).all())

Exemple #4

0

Afficher le fichier

Fichier : fftmanager.py Projet : macronucleus/Chromagnon

 def init_reikna(self):
     if REIK:
         if CUDA:
             self.api = cluda.cuda_api()
         else:
             self.api = cluda.ocl_api()
         self.dev = self.api.get_platforms()[0].get_devices()[0]

Exemple #5

0

Afficher le fichier

Fichier : squeezing_feshbach.py Projet : fjarri/thesis

def test_uncertainties(trajectories=128):

    api = cluda.ocl_api()
    thr = api.Thread.create()

    rng = numpy.random.RandomState(1234)

    gs_gen = ImaginaryTimeGroundState(thr, state_dtype, grid, system, cutoff=cutoff)

    # Ground state
    psi = gs_gen([N, 0], E_diff=1e-7, E_conv=1e-9, sample_time=1e-6)

    # Initial noise
    psi = psi.to_wigner_coherent(trajectories, seed=rng.randint(0, 2**32-1))

    integrator = Integrator(
        psi, system,
        trajectories=trajectories, stepper_cls=stepper_cls,
        wigner=True, seed=rng.randint(0, 2**32-1),
        cutoff=cutoff)

    # Prepare samplers
    bs = BeamSplitter(psi, f_detuning=f_detuning, f_rabi=f_rabi)
    n_sampler = PopulationSampler(psi)
    i_sampler = InteractionSampler(psi)
    samplers = dict(N=n_sampler, I=i_sampler)

    # Integrate
    bs(psi.data, 0, numpy.pi / 2)
    result, info = integrator.fixed_step(
        psi, 0, interval, steps, samples=samples,
        samplers=samplers, weak_convergence=['N', 'I'])

    return result, info

Exemple #6

0

Afficher le fichier

Fichier : generate_one_comp_gs.py Projet : fjarri/thesis

def get_gs(N):

    api = cluda.ocl_api()
    thr = api.Thread.create()

    comp = const.rb87_1_minus1
    freqs = (97.6, 97.6, 11.96)
    shape = (32, 32, 128)
    dtype = numpy.complex128

    scattering = const.scattering_matrix([comp])
    potential = HarmonicPotential(freqs)
    system = System([comp], scattering, potential=potential)
    box = box_for_tf(system, 0, N)
    grid = UniformGrid(shape, box)

    tf_gs = tf_ground_state(thr, grid, dtype, system, [N])
    gs = it_ground_state(thr, grid, dtype, system, [N],
        E_diff=1e-7, E_conv=1e-9, sample_time=1e-5)

    n_x = (numpy.abs(gs.data.get()) ** 2)[:, 0].sum((1, 2)) * grid.dxs[0] * grid.dxs[1]
    tf_n_x = (numpy.abs(tf_gs.data.get()) ** 2)[:, 0].sum((1, 2)) * grid.dxs[0] * grid.dxs[1]
    xs = grid.xs[2]

    return xs.tolist(), n_x.tolist(), tf_n_x.tolist()

Exemple #7

0

Afficher le fichier

Fichier : fessler_nufft.py Projet : jyhmiinlin/cineFSE

    def __initialize_gpu(self):
        try:
            import reikna.cluda as cluda
            from reikna.fft import FFT 
#             dtype = dtype#numpy.complex64
            data = numpy.zeros( self.st['Kd'],dtype=numpy.complex64)
#             data2 = numpy.empty_like(data)
            api = cluda.ocl_api()
            self.thr = api.Thread.create(async=True)      
            self.data_dev = self.thr.to_device(data)
#             self.data_rec = self.thr.to_device(data2)
            axes=range(0,numpy.size(self.st['Kd']))
            myfft=  FFT( data, axes=axes)
            self.myfft = myfft.compile(self.thr,fast_math=True)
 
            self.gpu_flag=1
            print('create gpu fft?',self.gpu_flag)
            print('line 642')
            W= self.st['w'][...,0]
            print('line 645')
            self.W = numpy.reshape(W, self.st['Kd'],order='C')
            print('line 647')
#             self.thr2 = api.Thread.create() 
            print('line 649')
            self.W_dev = self.thr.to_device(self.W.astype(dtype))
            self.gpu_flag=1                
            print('line 652')
        except:
            self.gpu_flag=0              
            print('get error, using cpu')

Exemple #8

0

Afficher le fichier

Fichier : imageprocessing.py Projet : ufo-kit/syris

def _fft_2(data, inverse=False, queue=None, block=True):
    """Execute FFT on *data*, which is first converted to a pyopencl array and retyped to
    complex.
    """
    if not queue:
        queue = cfg.OPENCL.queue
    thread = ocl_api().Thread(queue)
    data = g_util.get_array(data, queue=queue)
    if data.dtype != cfg.PRECISION.np_cplx:
        data = data.astype(cfg.PRECISION.np_cplx)

    if queue not in cfg.OPENCL.fft_plans:
        cfg.OPENCL.fft_plans[queue] = {}
    if data.shape not in cfg.OPENCL.fft_plans[queue]:
        LOG.debug("Creating FFT Plan for {} and shape {}".format(queue, data.shape))
        _fft = FFT(data, axes=(0, 1))
        cfg.OPENCL.fft_plans[queue][data.shape] = _fft.compile(thread, fast_math=False)
    plan = cfg.OPENCL.fft_plans[queue][data.shape]

    LOG.debug("fft_2, shape: %s, inverse: %s", data.shape, inverse)
    # plan.execute(data.data, inverse=inverse, wait_for_finish=block)
    plan(data, data, inverse=inverse)
    if block:
        thread.synchronize()

    return data

Exemple #9

0

Afficher le fichier

Fichier : fftmanager.py Projet : iandobbie/Chromagnon

 def init_reikna(self):
     if REIK:
         if CUDA:
             self.api = cluda.cuda_api()
         else:
             self.api = cluda.ocl_api()
         self.dev = self.api.get_platforms()[0].get_devices()[0]

Exemple #10

0

Afficher le fichier

    def __initialize_gpu(self):
        try:
            import reikna.cluda as cluda
            from reikna.fft import FFT
            #             dtype = dtype#numpy.complex64
            data = numpy.zeros(self.st['Kd'], dtype=numpy.complex64)
            #             data2 = numpy.empty_like(data)
            api = cluda.ocl_api()
            self.thr = api.Thread.create(async=True)
            self.data_dev = self.thr.to_device(data)
            #             self.data_rec = self.thr.to_device(data2)
            axes = range(0, numpy.size(self.st['Kd']))
            myfft = FFT(data, axes=axes)
            self.myfft = myfft.compile(self.thr, fast_math=True)

            self.gpu_flag = 1
            print('create gpu fft?', self.gpu_flag)
            print('line 642')
            W = self.st['w'][..., 0]
            print('line 645')
            self.W = numpy.reshape(W, self.st['Kd'], order='C')
            print('line 647')
            #             self.thr2 = api.Thread.create()
            print('line 649')
            self.W_dev = self.thr.to_device(self.W.astype(dtype))
            self.gpu_flag = 1
            print('line 652')
        except:
            self.gpu_flag = 0
            print('get error, using cpu')

Exemple #11

0

Afficher le fichier

def gpu_preindex_copy(image, Nd, Kd):
    import reikna.cluda as cluda
    from pynufft.src.re_subroutine import create_kernel_sets
    kernel_sets = create_kernel_sets('ocl')
               
    from pynufft.src._helper import helper
    
    
    copy_in, copy_out, ele = helper.preindex_copy(Nd, Kd)    
    
    api = cluda.ocl_api()
    device = api.get_platforms()[1].get_devices()[0]
    thr = api.Thread(device)
    prg = thr.compile(kernel_sets, 
                                render_kwds=dict(LL =  str(2)), 
                                fast_math=False)
    g_image = thr.to_device(image.astype(numpy.complex64))
    g_image3 = thr.array(Kd, dtype = numpy.complex64).fill(0.0)
    prodNd = numpy.prod(Nd)
    prodKd = numpy.prod(Kd)
    dim = numpy.uint32(len(Nd))
    print(dim, prodNd, prodKd)
    res_Nd = ()
    res_Kd = ()
    accum_prodnd = 1
    accum_prodkd = 1
    for pp in range(0, dim):
        """
        Nd:    2    3    4    5
        prodNd 120
        dim: 0    1    2    3
            60    20    5    1
        
        """
        accum_prodnd *= Nd[pp]
        accum_prodkd *= Kd[pp]
        res_Nd += (prodNd/ accum_prodnd,)
        res_Kd += (prodKd/ accum_prodkd,)
          
        
    print(res_Nd, res_Kd)
    g_Nd = thr.to_device(numpy.array(res_Nd, dtype = numpy.uint32))
    g_Kd = thr.to_device(numpy.array(res_Kd, dtype = numpy.uint32))
    g_invNd = thr.to_device(1/numpy.array(res_Nd, dtype = numpy.float32))
    
    g_in =  thr.to_device(copy_in.astype(numpy.uint32))
    g_out =  thr.to_device(copy_out.astype(numpy.uint32))
    
    print(g_Nd.get())
    print(g_Kd.get())
    import time
    t0 = time.time()
    for pp in range(0, 100):
        prg.cTensorCopy(dim, g_Nd, g_Kd, g_invNd, g_image, g_image3, local_size = None, global_size = int(prodNd))
    
    t1 = time.time()
    print('gpu_time = ', (t1 - t0)/100)
    thr.synchronize()
    return g_image3

Exemple #12

0

Afficher le fichier

Fichier : oclfft.py Projet : simone-codeluppi/gputools

def fft_plan(shape, dtype=np.complex64, axes=None, fast_math=True):
    """returns an reikna plan/FFT obj of shape dshape
    """
    # if not axes is None and any([a<0 for a in axes]):
    #     raise NotImplementedError("indices of axes have to be non negative, but are: %s"%str(axes))

    axes = _convert_axes_to_absolute(shape, axes)

    mock_buffer = MockBuffer(dtype, shape)

    fft_plan = FFT(mock_buffer, axes=axes).compile(cluda.ocl_api().Thread(get_device().queue),
                                                   fast_math=fast_math)

    return fft_plan

Exemple #13

0

Afficher le fichier

def gpu_preindex_multiply(image, vec, axis):
    import reikna.cluda as cluda
    from pynufft.src.re_subroutine import create_kernel_sets
    kernel_sets = create_kernel_sets('ocl')
               
    from pynufft.src._helper import helper
    
    
    Nd = image.shape
    Nd_elements = tuple( numpy.prod(Nd[dd+1:]) for dd in range(len(Nd)))  
    invNd_elements = 1.0/numpy.asarray(Nd_elements)
    print(Nd_elements,invNd_elements)
    api = cluda.ocl_api()
    device = api.get_platforms()[0].get_devices()[0]
    thr = api.Thread(device)
    prg = thr.compile(kernel_sets, 
                                render_kwds=dict(LL =  str(2)), 
                                fast_math=False)
    g_image = thr.to_device(image.astype(numpy.complex64))
#     g_image3 = thr.array(Nd, dtype = numpy.complex64).fill(0.0)
    g_vec = thr.to_device(vec.astype(numpy.float32))
    prodNd = numpy.prod(Nd)
#     import time
#     t0 = time.time()
#     for pp in range(0, 100):

    g_Nd = thr.to_device(numpy.asarray(Nd, dtype = numpy.uint32))
    g_Nd_elements = thr.to_device(numpy.asarray(Nd_elements, dtype = numpy.uint32))
    g_invNd_elements = thr.to_device(numpy.asarray(invNd_elements, dtype = numpy.float32))
    vec2 = numpy.zeros((numpy.sum(Nd), ), dtype = numpy.float32)
    vec2[0:Nd[0]] = vec
    vec2[Nd[0]:(Nd[0] + Nd[1])] = vec
    g_vec = thr.to_device(vec2)
    prg.cTensorMultiply(numpy.uint32(1), 
                                    numpy.uint32(len(Nd)),
                                    g_Nd,
                                    g_Nd_elements,
                                    g_invNd_elements,
                                    g_vec, 
                                    g_image, 
                                    local_size = None, global_size = int(prodNd))
    
#     t1 = time.time()
#     print('gpu_time = ', (t1 - t0)/100)
    thr.synchronize()
    return g_image

Exemple #14

0

Afficher le fichier

Fichier : gpuDFTInterpolator.py Projet : TanjaDorst/datareceiver

def main():
    api = cluda.ocl_api()
    thr = api.Thread.create(temp_alloc=dict(cls=TrivialManager))

    N = 256
    M = 10000

    data_in = np.random.rand(N)
    data_in = data_in.astype(np.float32)

    cl_data_in = thr.to_device(data_in)

    cl_data_out = thr.array(data_in.shape, np.complex64)

    fft = FFT(thr)
    fft.connect(tr, 'input', ['input_re'])
    fft.prepare_for(cl_data_out, cl_data_in, -1, axes=(0, ))

Exemple #15

0

Afficher le fichier

Fichier : split_potentials.py Projet : fjarri/thesis

def run_pass(trajectories=128):

    api = cluda.ocl_api()
    thr = api.Thread.create()

    rng = numpy.random.RandomState(1234)

    gs_gen = ImaginaryTimeGroundState(thr, state_dtype, grid, system_init,
        stepper_cls=RK46NLStepper, cutoff=cutoff)

    # Ground state
    psi = gs_gen([N, 0], E_diff=1e-7, E_conv=1e-9, sample_time=1e-6)

    # Initial noise
    psi = psi.to_wigner_coherent(trajectories, seed=rng.randint(0, 2**32-1))

    integrator = Integrator(
        psi, system_split,
        trajectories=trajectories,
        stepper_cls=RK46NLStepper, cutoff=cutoff,
        wigner=True, seed=rng.randint(0, 2**32-1))

    # Prepare samplers
    bs = BeamSplitter(psi, f_detuning=f_detuning, f_rabi=f_rabi)
    n_bs_sampler = PopulationSampler(psi, beam_splitter=bs, theta=numpy.pi / 2)
    n_sampler = PopulationSampler(psi)
    i_sampler = InteractionSampler(psi)
    v_sampler = VisibilitySampler(psi)
    v_sampler.no_values = True
    ax_sampler = Density1DSampler(psi, axis=2)
    ax_sampler.no_values = True

    samplers = dict(
        N=n_sampler, I=i_sampler, V=v_sampler,
        N_bs=n_bs_sampler, axial_density=ax_sampler)

    # Integrate
    bs(psi.data, 0, numpy.pi / 2)

    result, info = integrator.fixed_step(
        psi, 0, splitting_time, steps, samples=samples,
        samplers=samplers, weak_convergence=['N', 'I', 'V'])

    return result, info

Exemple #16

0

Afficher le fichier

Fichier : reikna_test.py Projet : csfoo/TF_binding

def main():
    api = cluda.ocl_api()
    thr = api.Thread.create()
    print thr
    shape1 = (100, 200)
    shape2 = (200, 100)

    a = numpy.random.randn(*shape1).astype(numpy.float32)
    b = numpy.random.randn(*shape2).astype(numpy.float32)
    a_dev = thr.to_device(a)
    b_dev = thr.to_device(b)
    res_dev = thr.array((shape1[0], shape2[1]), dtype=numpy.float32)

    dot = MatrixMul(a_dev, b_dev, out_arr=res_dev)
    dotc = dot.compile(thr)
    dotc(res_dev, a_dev, b_dev)

    res_reference = numpy.dot(a, b)
    print res_reference

Exemple #17

0

Afficher le fichier

    def __init__(self, **ctx_kw_args):
        print("""
\t############ WELCOME TO CHIMERA.CL ############
""")
        if ctx_kw_args == {}:
            print("""
\t  CONTEXT IS NOT CHOSEN, PLEASE, DO IT NOW.
\t  TO AVOID BEING ASKED IN THE FUTURE, YOU MAY
\t  SPECIFY ARGUMENT OF COMMUNICATOR, e.g.
\t    comm = Communicator(answers=[0,2])
\t################################################
""")
            ctx_kw_args['interactive'] = True
        self.ctx = create_some_context(**ctx_kw_args)
        self.queue = CommandQueue(self.ctx)

        api = ocl_api()
        self.thr = api.Thread(cqd=self.queue)

        selected_dev = self.queue.device
        self.dev_type = device_type.to_string(selected_dev.type)
        self.dev_name = self.queue.device.name
        self.plat_name = selected_dev.platform.vendor
        self.ocl_version = selected_dev.opencl_c_version

        print("""
\t  {} DEVICE {} IS CHOSEN
\t  ON {} PLATFORM
\t  WITH {} COMPILER
""".format(self.dev_type, self.dev_name, self.plat_name, self.ocl_version))

        if self.dev_type == 'CPU' and self.plat_name == 'Apple':
            print('\t\tReikna FFT is replaced by pyFFTW')
            self.fft_method = 'pyFFTW'
        else:
            self.fft_method = 'Reikna'

        if self.dev_type == 'CPU':
            print('\t\tReikna MatrixMul is replaced by numpy.dot')
            self.dot_method = 'NumPy'
        else:
            self.dot_method = 'Reikna'

Exemple #18

0

Afficher le fichier

def main():
    api = cluda.ocl_api()
    # thr = api.Thread.create()
    thr = api.Thread.create({'exclude_devices': 'Iris Pro'})

    n = 6000
    m = 3000

    shape1 = (n, m)
    shape2 = (m, n)

    a = numpy.random.randn(*shape1).astype(numpy.float32)
    b = numpy.random.randn(*shape2).astype(numpy.float32)


    a_dev = thr.to_device(a)
    b_dev = thr.to_device(b)
    res_dev = thr.array((shape1[0], shape2[1]), dtype=numpy.float32)

    dot = MatrixMul(a_dev, b_dev, out_arr=res_dev)
    dotc = dot.compile(thr)

    gt = 0
    for i in range(10):
        thr.synchronize()
        gpu_start = time.time()
        dotc(res_dev, a_dev, b_dev)
        thr.synchronize()
        gt += time.time() - gpu_start
    print(gt)

    ct = 0
    res_reference = None
    for i in range(10):
        t = time.time()
        res_reference = numpy.dot(a, b)
        ct += time.time() - t
    print(ct)

    print(norm(res_dev.get() - res_reference) / norm(res_reference) < 1e-6)

Exemple #19

0

Afficher le fichier

def initialize_gpu(backend, **kwargs):
    '''
    Initialize a new GPU context.

    :param backend: backend to use. It must be any of "cuda" or "opencl".
    :type backend: str
    :param kwargs: configuration for the device lookup (see below for details).
    :type kwargs: dict

    * *interactive*: (bool) whether to select the device manually
       (defaults to False).

    * *device*: (int) number of the device to use (defaults to None).

    .. note:: The device can be selected using the MINKIT_DEVICE environment variable.
    '''
    from reikna import cluda

    if backend == CUDA:
        api = cluda.cuda_api()
    elif backend == OPENCL:
        api = cluda.ocl_api()
    else:
        raise ValueError(f'Unknown backend type "{backend}"')

    # Get all available devices
    platforms = api.get_platforms()

    all_devices = [(p, d) for p in platforms for d in p.get_devices()]

    # Determine the device to use
    idev = device_lookup(all_devices, **kwargs)

    platform, device = all_devices[idev]

    logger.info(
        f'Selected device "{device.name}" ({idev}) (platform: {platform.name})'
    )

    return Context(api, device, backend)

Exemple #20

0

Afficher le fichier

Fichier : reikna_test.py Projet : TomWerner/hpelm

def run():

    api = cluda.ocl_api()
    thr = api.Thread.create()
    
    n = 3000
    shape1 = (n, n)
    shape2 = (n, n)
    
    a = numpy.random.randn(*shape1).astype(numpy.float32)
    b = numpy.random.randn(*shape2).astype(numpy.float32)
    a_dev = thr.to_device(a)
    b_dev = thr.to_device(b)
    res_dev = thr.array((shape1[0], shape2[1]), dtype=numpy.float32)
    
    dot = MatrixMul(a_dev, b_dev, out_arr=res_dev)
    dotc = dot.compile(thr)
    dotc(res_dev, a_dev, b_dev)
    
    res_reference = numpy.dot(a, b)
    
    print(norm(res_dev.get() - res_reference) / norm(res_reference) < 1e-6)

Exemple #21

0

Afficher le fichier

Fichier : generate_two_comp_gs.py Projet : fjarri/thesis

def get_gs(N, a12):

    api = cluda.ocl_api()
    thr = api.Thread.create()

    comps = [const.rb87_1_minus1, const.rb87_2_1]
    freqs = (97.6, 97.6, 11.96)
    shape = (32, 32, 128)
    dtype = numpy.complex128

    scattering = const.scattering_3d(numpy.array([[100.4, a12], [a12, 95.44]]), comps[0].m)
    potential = HarmonicPotential(freqs)
    system = System(comps, scattering, potential=potential)
    box = box_for_tf(system, 0, N)
    grid = UniformGrid(shape, box)

    gs = it_ground_state(thr, grid, dtype, system, [N / 2, N / 2], E_diff=1e-7, E_conv=1e-9, sample_time=1e-4)

    n_x = (numpy.abs(gs.data.get()) ** 2)[:, 0].sum((1, 2)) * grid.dxs[0] * grid.dxs[1]
    xs = grid.xs[2]

    return xs.tolist(), n_x.tolist()

Exemple #22

0

Afficher le fichier

Fichier : nufft_hsa_legacy.py Projet : suiy02/pynufft

    def __init__(self, API = None, platform_number=None, device_number=None):
        """
        Constructor.
        :param API: The API for the heterogeneous system. API='cuda' or API='ocl'
        :param platform_number: The number of the platform found by the API. 
        :param device_number: The number of the device found on the platform. 
        :type API: string
        :type platform_number: integer 
        :type device_number: integer 
        :returns: 0
        :rtype: int, float
 
        :Example:
 
        >>> import pynufft
        >>> NufftObj = pynufft.NUFFT_hsa(API='cuda', 0, 0)        
        """
         
#         pass
        self.dtype = numpy.complex64
#         NUFFT_cpu.__init__(self)
     
        import reikna.cluda as cluda
        print('API = ', API)
        self.cuda_flag, self.ocl_flag = helper.diagnose()
        if None is API:
            if self.cuda_flag is 1:
                API = 'cuda'
            elif self.ocl_flag is 1:
                API = 'ocl'
            else:
                print('No accelerator is available.')
        else:
            api = API
        print('now using API = ', API)
        if platform_number is None:
            platform_number = 0
        if device_number is None:
            device_number = 0
         
        from reikna import cluda
        import reikna.transformations
        from reikna.cluda import functions, dtypes
        try: # try to create api/platform/device using the given parameters
            if 'cuda' == API:
                api = cluda.cuda_api()
            elif 'ocl' == API:
                api = cluda.ocl_api()
      
            platform = api.get_platforms()[platform_number]
             
            device = platform.get_devices()[device_number]
        except: # if failed, find out what's going wrong?
            print('No accelerator is detected.')
             
#             return 1
 
#         Create context from device
        self.thr = api.Thread(device) #pyopencl.create_some_context()
        print('Using opencl or cuda = ', self.thr.api)
         
#         print('Using opencl?  ', self.thr.api is reikna.cluda.ocl)
#         """
#         Wavefront: as warp in cuda. Can control the width in a workgroup
#         Wavefront is required in spmv_vector as it improves data coalescence.
#         see cCSR_spmv and zSparseMatVec
#         """
        self.wavefront = api.DeviceParameters(device).warp_size
 
        print('wavefront of OpenCL (as warp in CUDA) = ',self.wavefront)
 
 
        from ..src.re_subroutine import create_kernel_sets
        kernel_sets = create_kernel_sets(API)
                
        prg = self.thr.compile(kernel_sets, 
                                render_kwds=dict(LL =  str(self.wavefront)), 
                                fast_math=False)
        self.prg = prg        
         
        print("Note: In the future the api will change!")
        print("You have been warned!")

Exemple #23

0

Afficher le fichier

Fichier : test_soliton.py Projet : fjarri-attic/vienna_simulation

def test_soliton():

    seed = 31415926 # random seed
    modes = 128 # spatial lattice points
    L_trap = 14. # spatial domain
    ensembles = 64 # simulation paths
    gamma = 0.1
    t = 2.5 # time interval
    samples = 100 # how many samples to take during simulation
    steps = samples * 400 # number of time steps (should be multiple of samples)
    v = 40.0 # strength of the potential
    soliton_height = 10.0
    soliton_shift = 1.0
    dtype = numpy.complex128

    problem_shape = (modes,)
    shape = (1, ensembles) + problem_shape
    box = (L_trap,)
    dV = L_trap / modes
    xgrid = numpy.linspace(-L_trap/2 + dV/2, L_trap/2 - dV/2, modes)

    api = ocl_api()
    #device = api.get_platforms()[0].get_devices()[1]
    #thr = api.Thread(device)
    thr = api.Thread.create()

    interaction = numpy.array([[gamma]])
    tunneling = [(0, 0)]
    nonlinear_module = get_nonlinear(dtype, interaction, tunneling)
    potential = v * xgrid ** 2

    psi = numpy.empty(shape, dtype)

    integrator = Integrator(thr, psi, box, t, steps, samples,
        kinetic_coeff=0.5,
        nonlinear_module=nonlinear_module,
        potentials=potential)


    # Classical ground state
    psi = soliton_height / numpy.cosh(xgrid - soliton_shift)
    psi = psi.reshape(1, 1, *psi.shape).astype(dtype)
    psi = numpy.tile(psi, (1, ensembles, 1))

    # To Wigner
    rs = numpy.random.RandomState(seed=456)
    normals = rs.normal(size=(2,) + shape, scale=numpy.sqrt(0.5))
    noise_kspace = numpy.sqrt(0.5) * (normals[0] + 1j * normals[1])

    fft_scale = numpy.sqrt(dV / product(problem_shape))
    psi += numpy.fft.ifftn(noise_kspace, axes=range(2, len(shape))) / fft_scale

    psi_dev = thr.to_device(psi)
    collector = CollectorWigner1D(dV)
    results = integrator(psi_dev, [collector])

    print("Errors:", results.errors)
    # TODO: what causes the errors this big? there seems to be plenty of time steps
    assert results.errors['density'] < 1e-4
    assert results.errors['psi_strong_mean'] < 0.01
    assert results.errors['psi_strong_max'] < 0.01

    # Check that the population stayed constant
    N_total = results.values['N']
    # Not using N, since the initial value can differ slightly (due to initial sampling)
    N_diff = (N_total - N_total[0]) / N_total[0]
    assert numpy.abs(N_diff).max() < 1e-5

    plot_soliton(results.values['density'], L_trap, soliton_height ** 2, t)

Exemple #24

0

Afficher le fichier

import time
import numpy as np
from numpy.linalg import norm
import sys
import reikna.cluda as cluda
from reikna.matrixmul import MatrixMul

api = cluda.ocl_api()
amd, = api.get_platforms()
gpu_dev, cpu_dev = amd.get_devices()
thr = api.Thread(cpu_dev)
print 'USING DEVICE: ', thr._device


def main(M=512, N=512, K=512, seed=0, dtype=np.float32):
    rng = np.random.RandomState(seed)
    A = np.asarray(rng.normal(size=(M, K)), dtype=dtype)
    B = np.asarray(rng.normal(size=(K, N)), dtype=dtype)
    C = np.asarray(rng.normal(size=(M, N)), dtype=dtype)
    C_reference = np.dot(A, B)
    a_dev = thr.to_device(A)
    b_dev = thr.to_device(B)
    c_dev = thr.to_device(C)

    dot = MatrixMul(a_dev, b_dev, out_arr=c_dev)
    dotc = dot.compile(thr)

    FLOPS = M * N * (K + 1) * 2

    for i in range(5):
        C[:] = 0

Exemple #25

0

Afficher le fichier

Fichier : nufft_hsa.py Projet : suiy02/pynufft

    def __init__(self,
                 API=None,
                 platform_number=None,
                 device_number=None,
                 verbosity=0):
        """
        Constructor.

        :param API: The API for the heterogeneous system. API='cuda'
                    or API='ocl'
        :param platform_number: The number of the platform found by the API.
        :param device_number: The number of the device found on the platform.
        :param verbosity: Defines the verbosity level, default value is 0
        :type API: string
        :type platform_number: integer
        :type device_number: integer
        :type verbosity: integer
        :returns: 0
        :rtype: int, float

        :Example:

        >>> import pynufft
        >>> NufftObj = pynufft.NUFFT_hsa(API='cuda', platform_number=0,
                                         device_number=0, verbosity=0)
        """
        warnings.warn(
            'In the future NUFFT_hsa and NUFFT_cpu api will'
            ' be merged', FutureWarning)
        self.dtype = numpy.complex64
        self.verbosity = verbosity

        import reikna.cluda as cluda
        if self.verbosity > 0:
            print('The choosen API by the user is ', API)
        self.cuda_flag, self.ocl_flag = helper.diagnose(
            verbosity=self.verbosity)
        if None is API:
            if self.cuda_flag is 1:
                API = 'cuda'
            elif self.ocl_flag is 1:
                API = 'ocl'
            else:
                warnings.warn(
                    'No parallelization will be made since no GPU '
                    'device has been detected.', UserWarning)
        else:
            api = API
        if self.verbosity > 0:
            print('The used API will be ', API)
        if platform_number is None:
            platform_number = 0
        if device_number is None:
            device_number = 0

        from reikna import cluda
        import reikna.transformations
        from reikna.cluda import functions, dtypes
        try:  # try to create api/platform/device using the given parameters
            if 'cuda' == API:
                api = cluda.cuda_api()
            elif 'ocl' == API:
                api = cluda.ocl_api()

            platform = api.get_platforms()[platform_number]

            device = platform.get_devices()[device_number]
        except:  # if failed, find out what's going wrong?
            warnings.warn(
                'No parallelization will be made since no GPU '
                'device has been detected.', UserWarning)

#             return 1

#         Create context from device
        self.thr = api.Thread(device)  # pyopencl.create_some_context()
        self.device = device  # : device name
        if self.verbosity > 0:
            print('Using opencl or cuda = ', self.thr.api)

#         """
#         Wavefront: as warp in cuda. Can control the width in a workgroup
#         Wavefront is required in spmv_vector as it improves data coalescence.
#         see cCSR_spmv and zSparseMatVec
#         """
        self.wavefront = api.DeviceParameters(device).warp_size
        if self.verbosity > 0:
            print('Wavefront of OpenCL (as wrap of CUDA) = ', self.wavefront)

        from ..src import re_subroutine  # import create_kernel_sets
        kernel_sets = re_subroutine.create_kernel_sets(API)

        prg = self.thr.compile(kernel_sets,
                               render_kwds=dict(LL=str(self.wavefront)),
                               fast_math=False)
        self.prg = prg

Exemple #26

0

Afficher le fichier

        device for device in Device.__dict__.keys() if device[0] != "_"
]:
    setattr(Tensor, f"{device.lower()}",
            functools.partialmethod(Tensor.to, Device.__dict__[device]))
    setattr(Tensor, f"{device.lower()}_",
            functools.partialmethod(Tensor.to_, Device.__dict__[device]))


# this registers all the operations
def _register_ops(namespace, device=Device.CPU):
    for name, cls in inspect.getmembers(namespace, inspect.isclass):
        if name[0] != "_": register(name.lower(), cls, device=device)


from tinygrad import ops_cpu

_register_ops(ops_cpu)
try:
    import reikna.cluda as cluda
    from tinygrad import ops_gpu

    _register_ops(ops_gpu, device=Device.GPU)
    api = cluda.cuda_api() if os.environ.get(
        "GPAPI", "opencl") == "cuda" else cluda.ocl_api()
    thr = api.Thread.create()
    GPU = True
except ImportError:
    # no GPU support
    GPU = False
ANE = False

Exemple #27

0

Afficher le fichier

    def __init__(self,
                 inputSize,
                 axes=(-1, ),
                 mode="pyfftw",
                 dtype="complex64",
                 direction="FORWARD",
                 fftw_FLAGS=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"),
                 THREADS=None,
                 loggingLevel=None):
        self.axes = axes
        self.direction = direction

        if loggingLevel:
            logger.setLoggingLevel(loggingLevel)

        if mode == "gpu" or mode == "gpu_ocl" or mode == "gpu_cuda":
            if mode == "gpu":
                mode = "gpu_ocl"
            if REIKNA_AVAILABLE:
                if mode == "gpu_ocl":
                    try:
                        reikna_api = cluda.ocl_api()
                        self.reikna_thread = reikna_api.Thread.create()
                        self.FFTMODE = "gpu"
                    except:
                        logger.warning("no reikna opencl available. \
                                            will try cuda")
                        mode = "gpu_cuda"
                if mode == "gpu_cuda":
                    try:
                        reikna_api = cluda.cuda_api()
                        self.reikna_thread = reikna_api.Thread.create()
                        self.FFTMODE = "gpu"
                    except:
                        logger.warning("no cuda available. \
                                Switching to pyfftw")
                        mode = "pyfftw"
            else:
                logger.warning("No gpu algorithms available\
                        switching to pyfftw")
                mode = "pyfftw"

        if mode == "pyfftw":
            if PYFFTW_AVAILABLE:
                self.FFTMODE = "pyfftw"
            else:
                logger.warning("No pyfftw available. \
                                Defaulting to scipy.fftpack")
                mode = "scipy"

        if mode == "scipy":
            if SCIPY_AVAILABLE:
                self.FFTMODE = "scipy"
            else:
                logger.warning("No scipy available - fft won't function.")

        if self.FFTMODE == "gpu":
            if direction == "FORWARD":
                self.inverse = 1
            elif direction == "BACKWARD":
                self.inverse = 0

            self.inputData = numpy.zeros(inputSize, dtype=dtype)
            inputData_dev = self.reikna_thread.to_device(self.inputData)
            self.outputData_dev = self.reikna_thread.array(inputSize,
                                                           dtype=dtype)

            logger.info("Generating and compiling reikna gpu fft plan...")
            reikna_ft = reikna.fft.FFT(inputData_dev, axes=axes)
            self.reikna_ft_c = reikna_ft.compile(self.reikna_thread)
            logger.info("Done!")

        if self.FFTMODE == "pyfftw":
            if THREADS == None:
                THREADS = cpu_count()

            #fftw_FLAGS Set the optimisation level of fftw3,
            #(more optimisation takes longer - but gives quicker ffts.)
            #Can be FFTW_ESTIMATE, FFTW_MEASURE, FFT_PATIENT, FFTW_EXHAUSTIVE
            n = pyfftw.simd_alignment

            self.inputData = pyfftw.n_byte_align_empty(inputSize, n, dtype)
            self.inputData[:] = numpy.zeros(inputSize, dtype=dtype)
            self.outputData = pyfftw.n_byte_align_empty(inputSize, n, dtype)
            self.outputData[:] = numpy.zeros(inputSize, dtype=dtype)

            logger.info(
                "Generating fftw3 plan....\nIf this takes too long, change fftw_FLAGS."
            )
            logger.debug("currently set to: {})".format(fftw_FLAGS))
            if direction == "FORWARD":
                self.fftwPlan = pyfftw.FFTW(self.inputData,
                                            self.outputData,
                                            axes=axes,
                                            threads=THREADS,
                                            flags=fftw_FLAGS)
            elif direction == "BACKWARD":
                self.fftwPlan = pyfftw.FFTW(self.inputData,
                                            self.outputData,
                                            direction='FFTW_BACKWARD',
                                            axes=axes,
                                            threads=THREADS,
                                            flags=fftw_FLAGS)
            logger.info("Done!")

        elif self.FFTMODE == "scipy":

            self.direction = direction
            self.inputData = numpy.zeros(inputSize, dtype=dtype)
            self.size = []
            for i in range(len(self.axes)):
                self.size.append(inputSize[self.axes[i]])

Exemple #28

0

Afficher le fichier

Fichier : visibility.py Projet : fjarri/thesis

def calculate_ramsey(pulse_theta_noise=0, wigner=False, echo=False, t=1.0,
        steps=20000, samples=100, N=55000, trajectories=1, shape=(8, 8, 64),
        losses_enabled=True):

    api = cluda.ocl_api()
    thr = api.Thread.create()

    f_detuning = 37
    f_rabi = 350
    state_dtype = numpy.complex128

    rng = numpy.random.RandomState(1234)

    freqs = (97.0, 97.0 * 1.03, 11.69)
    components = [const.rb87_1_minus1, const.rb87_2_1]

    scattering = const.scattering_3d(numpy.array([[100.4, 98.0], [98.0, 95.44]]), components[0].m)

    if losses_enabled:
        losses = [
            (5.4e-42 / 6, (3, 0)),
            (8.1e-20 / 4, (0, 2)),
            (1.51e-20 / 2, (1, 1)),
            ]
    else:
        losses = None

    potential = HarmonicPotential(freqs)
    system = System(components, scattering, potential=potential, losses=losses)

    # Initial state
    with open('ground_states/ground_state_8-8-64_1-1-1.pickle') as f:
        data = pickle.load(f)

    psi_gs = data['data']
    box = data['box']
    grid = UniformGrid(shape, box)
    print(grid.shape, grid.box)

    psi = WavefunctionSet(thr, numpy.complex128, grid, components=2)

    # Two-component state
    psi.fill_with(psi_gs)

    # Initial noise
    if wigner:
        psi = psi.to_wigner_coherent(trajectories, seed=rng.randint(0, 2**32-1))

    bs = BeamSplitter(psi, f_detuning=f_detuning, f_rabi=f_rabi, seed=rng.randint(0, 2**32-1))

    integrator = Integrator(
        psi, system,
        trajectories=trajectories, stepper_cls=RK46NLStepper,
        wigner=wigner, seed=rng.randint(0, 2**32-1))

    # Integrate
    n_bs_sampler = PopulationSampler(psi, beam_splitter=bs, theta=numpy.pi/2)
    n_sampler = PopulationSampler(psi)
    i_sampler = InteractionSampler(psi)
    v_sampler = VisibilitySampler(psi)

    samplers = dict(N_bs=n_bs_sampler, N=n_sampler, V=v_sampler, I=i_sampler)
    weak_convergence = ['N', 'V', 'N_bs', 'I']

    bs(psi.data, 0, numpy.pi / 2, theta_noise=pulse_theta_noise)

    if t > 0:
        if echo:
            result1, info1 = integrator.fixed_step(
                psi, 0, t / 2, steps // 2, samples=samples // 2 if samples > 1 else 1,
                samplers=samplers, weak_convergence=weak_convergence)
            bs(psi.data, t / 2, numpy.pi, theta_noise=pulse_theta_noise)
            result2, info2 = integrator.fixed_step(
                psi, t / 2, t, steps // 2, samples=samples // 2 if samples > 1 else 1,
                samplers=samplers, weak_convergence=weak_convergence)

            result = concatenate_results(result1, result2)
            info = info2
        else:
            result, info = integrator.fixed_step(
                psi, 0, t, steps, samples=samples, samplers=samplers,
                weak_convergence=weak_convergence)
        weak_errors = info.weak_errors
    else:
        samples, _ = sample(psi.data, 0, samplers)
        result = {}
        for key in samples:
            result[key] = dict(
                trajectories=trajectories,
                time=numpy.array([0]))
            for subkey in ('mean', 'values', 'stderr'):
                if subkey in samples[key]:
                    result[key][subkey] = samples[key][subkey].reshape(1, *samples[key][subkey].shape)
        weak_errors = {key:0 for key in weak_convergence}

    psi_type = 'wigner' if wigner else 'wavefunction'

    return dict(result=result, weak_errors=weak_errors, psi_type=psi_type,
        N=N, steps=steps,
        shape=grid.shape, box=grid.box,
        wigner=wigner)

Exemple #29

0

Afficher le fichier

Fichier : wigner.py Projet : fjarri-attic/early-universe

def run(dims, initial='same', gamma=1, nu=0, L_trap=10., samples=100,
        t=10, steps=5000, modes=512, ensembles=10, noise='coherent', T=0, U12=0):
    """
    Runs the simulation for

        i dpsi_j/dt = -nabla^2 psi_j / 2 + gamma |psi_j|^2 psi_j - nu psi_{1-j}

    With the initial state psi = 1 / sqrt(gamma)
    """
    assert initial in ('same', 'opposite')
    assert noise in ('coherent', 'bogolyubov')
    if noise == 'bogolyubov':
        assert dims == 1

    problem_shape = (modes,) * dims
    shape = (2, ensembles) + problem_shape
    box = (L_trap,) * dims
    ds = [L / points for L, points in zip(box, problem_shape)]
    fft_scale = numpy.sqrt(product(ds) / product(problem_shape))

    api = ocl_api()
    device = api.get_platforms()[0].get_devices()[1]
    thr = api.Thread(device)
    #thr = api.Thread.create()
    U = numpy.array([[gamma, U12 * gamma], [U12 * gamma, gamma]])
    integrator = Integrator(thr, shape, dtype, box, t, steps, samples,
        kinetic_coeff=0.5,
        nonlinear_module=nonlinear_no_potential(dtype, U, nu))

    psi = numpy.empty(shape, dtype)

    # Classical ground state

    psi.fill((1. / gamma) ** 0.5)

    if initial == 'opposite':
        psi[1] *= -1

    # To Wigner

    if noise == 'coherent':
        noise_kspace = numpy.sqrt(0.5) * (
            numpy.random.normal(size=shape, scale=numpy.sqrt(0.5))
            + 1j * numpy.random.normal(size=shape, scale=numpy.sqrt(0.5)))
    else:
        kgrid = numpy.fft.fftfreq(modes, L_trap / modes) * 2 * numpy.pi
        kgrid[0] = 1 # protection against warnings; we won't use this k anyway.

        # generate randoms
        Vk = T * (1. / (kgrid * numpy.sqrt(kgrid ** 2 + 2))) + 0.5
        Vk[0] = 0.5 # zeroth mode is just the vacuum mode
        betas = (
            numpy.random.normal(size=shape, scale=numpy.sqrt(0.5))
            + 1j * numpy.random.normal(size=shape, scale=numpy.sqrt(0.5))) * numpy.sqrt(Vk)

        phis = 0.5 * numpy.arctanh(1. / (kgrid ** 2 + 1))
        betas_m = betas.copy()
        betas_m[:,:,1:modes/2] = numpy.fliplr(betas[:,:,modes/2+1:])
        betas_m[:,:,modes/2+1:] = numpy.fliplr(betas[:,:,1:modes/2])
        noise_kspace = betas * numpy.cosh(phis) - betas_m.conj() * numpy.sinh(phis)
        noise_kspace[:,:,0] = betas[:,:,0] # zeroth mode is just the vacuum mode (no mixing)
        noise_kspace[:,:,modes/2] = betas[:,:,modes/2] # unmatched mode

    psi += numpy.fft.ifftn(noise_kspace, axes=range(2, len(shape))) / fft_scale
    psi = thr.to_device(psi)

    collector = CollectorWigner(ds)
    results, errors = integrator(psi, collector)
    results = transpose_results(results)

    results.update(dict(
        errors=errors,
        L_trap=L_trap, t=t, steps=steps, nu=nu, gamma=gamma,
        samples=samples, ensembles=ensembles))
    return results

Exemple #30

0

Afficher le fichier

def initialize_gpu(backend, **kwargs):
    '''
    Initialize a new GPU context.

    :param backend: backend to use. It must be any of "cuda" or "opencl".
    :type backend: str
    :param kwargs: it may contain any of the following values: \
    - interactive: (bool) whether to select the device manually (defaults to False) \
    - device: (int) number of the device to use (defaults to None).
    :type kwargs: dict

    .. note:: The device can be selected using the MINKIT_DEVICE environment variable.
    '''
    global BACKEND
    global DEVICE
    global CONTEXT
    global THREAD

    from reikna import cluda

    # Establish the backend
    if BACKEND is not None and backend != BACKEND:
        raise RuntimeError(
            f'Attempt to change backend from "{BACKEND}" to "{backend}"; not supported'
        )
    elif backend == CUDA:
        API = cluda.cuda_api()
    elif backend == OPENCL:
        API = cluda.ocl_api()
    elif backend == BACKEND:
        # Using same backend
        return
    else:
        raise ValueError(f'Unknown backend type "{backend}"')

    BACKEND = backend

    # Get all available devices
    platforms = API.get_platforms()

    all_devices = [(p, d) for p in platforms for d in p.get_devices()]

    # Determine the device to use
    idev = device_lookup(all_devices, **kwargs)

    platform, device = all_devices[idev]

    logger.info(
        f'Selected device "{device.name}" ({idev}) (platform: {platform.name})'
    )

    DEVICE = device

    # Create the context and thread
    if BACKEND == CUDA:
        CONTEXT = DEVICE.make_context()

        def clear_cuda_context():
            from pycuda.tools import clear_context_caches
            CONTEXT.pop()
            clear_context_caches()

        atexit.register(clear_cuda_context)
    else:
        # OPENCL
        import pyopencl
        CONTEXT = pyopencl.Context([DEVICE])

    THREAD = API.Thread(CONTEXT)

Exemple #31

0

Afficher le fichier

    def offload(self, API, platform_number=0, device_number=0):
        """
        self.offload():
        
        Off-load NUFFT to the opencl or cuda device(s)
        
        :param API: define the device type, which can be 'cuda' or 'ocl'
        :param platform_number: define which platform to be used. The default platform_number = 0.
        :param device_number: define which device to be used. The default device_number = 0.
        :type API: string
        :type platform_number: int
        :type device_number: int
        :return: self: instance

        """
        from reikna import cluda
        import reikna.transformations
        from reikna.cluda import functions, dtypes
        try: # try to create api/platform/device using the given parameters
            if 'cuda' == API:
                api = cluda.cuda_api()
            elif 'ocl' == API:
                api = cluda.ocl_api()
     
            platform = api.get_platforms()[platform_number]
            
            device = platform.get_devices()[device_number]
        except: # if failed, find out what's going wrong?
            diagnose()
            
            return 1

        
#         print('device = ', device)
#         Create context from device
        self.thr = api.Thread(device) #pyopencl.create_some_context()
#         self.queue = pyopencl.CommandQueue( self.ctx)

#         """
#         Wavefront: as warp in cuda. Can control the width in a workgroup
#         Wavefront is required in spmv_vector as it improves data coalescence.
#         see cSparseMatVec and zSparseMatVec
#         """
        self.wavefront = api.DeviceParameters(device).warp_size
        print(api.DeviceParameters(device).max_work_group_size)
#         print(self.wavefront)
#         print(type(self.wavefront))
#          pyopencl.characterize.get_simd_group_size(device[0], dtype.size)
        from src.re_subroutine import cMultiplyScalar, cCopy, cAddScalar,cAddVec, cSparseMatVec, cSelect, cMultiplyVec, cMultiplyVecInplace, cMultiplyConjVec, cDiff, cSqrt, cAnisoShrink
        # import complex float routines
#         print(dtypes.ctype(dtype))
        prg = self.thr.compile( 
                                cMultiplyScalar.R + #cCopy.R, 
                                cCopy.R + 
                                cAddScalar.R + 
                                cSelect.R +cMultiplyConjVec.R + cAddVec.R+
                                cMultiplyVecInplace.R +cSparseMatVec.R+cDiff.R+ cSqrt.R+ cAnisoShrink.R+ cMultiplyVec.R,
                                render_kwds=dict(
                                    LL =  str(self.wavefront)), fast_math=False)
#                                fast_math = False)
#                                 "#define LL  "+ str(self.wavefront) + "   "+cSparseMatVec.R)
#                                ),
#                                 fast_math=False)
#         prg2 = pyopencl.Program(self.ctx, "#define LL "+ str(self.wavefront) + " "+cSparseMatVec.R).build()
        
        self.cMultiplyScalar = prg.cMultiplyScalar
#         self.cMultiplyScalar.set_scalar_arg_dtypes( cMultiplyScalar.scalar_arg_dtypes)
        self.cCopy = prg.cCopy
        self.cAddScalar = prg.cAddScalar
        self.cAddVec = prg.cAddVec
        self.cSparseMatVec = prg.cSparseMatVec     
        self.cSelect = prg.cSelect
        self.cMultiplyVecInplace = prg.cMultiplyVecInplace
        self.cMultiplyVec = prg.cMultiplyVec
        self.cMultiplyConjVec = prg.cMultiplyConjVec
        self.cDiff = prg.cDiff
        self.cSqrt= prg.cSqrt
        self.cAnisoShrink = prg.cAnisoShrink                                 

#         self.xx_Kd = pyopencl.array.zeros(self.queue, self.st['Kd'], dtype=dtype, order="C")
        self.k_Kd = self.thr.to_device(numpy.zeros(self.st['Kd'], dtype=dtype, order="C"))
        self.k_Kd2 = self.thr.to_device(numpy.zeros(self.st['Kd'], dtype=dtype, order="C"))
        self.y =self.thr.to_device( numpy.zeros((self.st['M'],), dtype=dtype, order="C"))
        self.x_Nd = self.thr.to_device(numpy.zeros(self.st['Nd'], dtype=dtype, order="C"))
#         self.xx_Nd =     pyopencl.array.zeros(self.queue, self.st['Nd'], dtype=dtype, order="C")

        self.NdCPUorder, self.KdCPUorder, self.nelem =     preindex_copy(self.st['Nd'], self.st['Kd'])
        self.NdGPUorder = self.thr.to_device( self.NdCPUorder)
        self.KdGPUorder =  self.thr.to_device( self.KdCPUorder)
        self.Ndprod = numpy.int32(numpy.prod(self.st['Nd']))
        self.Kdprod = numpy.int32(numpy.prod(self.st['Kd']))
        self.M = numpy.int32( self.st['M'])
        
        self.SnGPUArray = self.thr.to_device(  self.sn)
        
        self.sp_data = self.thr.to_device( self.sp.data.astype(dtype))
        self.sp_indices =self.thr.to_device( self.sp.indices.astype(numpy.int32))
        self.sp_indptr = self.thr.to_device( self.sp.indptr.astype(numpy.int32))
        self.sp_numrow =  self.M
        del self.sp
        self.spH_data = self.thr.to_device(  self.spH.data.astype(dtype))
        self.spH_indices = self.thr.to_device(  self.spH.indices)
        self.spH_indptr = self.thr.to_device(  self.spH.indptr)
        self.spH_numrow = self.Kdprod
        del self.spH
        self.spHsp_data = self.thr.to_device(  self.spHsp.data.astype(dtype))
        self.spHsp_indices = self.thr.to_device( self.spHsp.indices)
        self.spHsp_indptr =self.thr.to_device(  self.spHsp.indptr)
        self.spHsp_numrow = self.Kdprod
        del self.spHsp
#         import reikna.cluda
        import reikna.fft
#         api = 
#         self.thr = reikna.cluda.ocl_api().Thread(self.queue)        
        self.fft = reikna.fft.FFT(self.k_Kd, numpy.arange(0, self.ndims)).compile(self.thr, fast_math=False)
#         self.fft = reikna.fft.FFT(self.k_Kd).compile(thr, fast_math=True)
#         self.fft = FFT(self.ctx, self.queue,  self.k_Kd, fast_math=True)
#         self.ifft = FFT(self.ctx, self.queue, self.k_Kd2,  fast_math=True)
        self.zero_scalar=dtype(0.0+0.0j)

Exemple #32

0

Afficher le fichier

Fichier : grid_test.py Projet : fjarri/thesis

def calculate_ramsey(
    wigner=False,
    t=1.0,
    steps=20000,
    samples=100,
    N=55000,
    trajectories=1,
    shape=(8, 8, 64),
    losses=True,
    linear_losses=None,
    box_modifiers=None,
):

    api = cluda.ocl_api()
    thr = api.Thread.create()

    f_detuning = 37
    f_rabi = 350
    state_dtype = numpy.complex128

    rng = numpy.random.RandomState(1234)

    freqs = (97.0, 97.0 * 1.03, 11.69)
    components = [const.rb87_1_minus1, const.rb87_2_1]

    scattering = const.scattering_3d(numpy.array([[100.4, 98.0], [98.0, 95.44]]), components[0].m)

    if linear_losses is not None:
        losses = [(linear_losses, (1, 0)), (linear_losses, (0, 1))]
    elif losses:
        losses = [(5.4e-42 / 6, (3, 0)), (8.1e-20 / 4, (0, 2)), (1.51e-20 / 2, (1, 1))]
    else:
        losses = None

    potential = HarmonicPotential(freqs)
    system = System(components, scattering, potential=potential, losses=losses)

    # Initial state
    if box_modifiers is None:
        box_modifiers = (1, 1, 1)
    with open(
        "ground_states/ground_state_"
        + "-".join([str(s) for s in shape])
        + "_"
        + "-".join([str(b) for b in box_modifiers])
        + ".pickle",
        "rb",
    ) as f:
        data = pickle.load(f)

    psi_gs = data["data"]
    box = data["box"]
    grid = UniformGrid(shape, box)
    print(grid.shape, box_modifiers, grid.box)

    psi = WavefunctionSet(thr, numpy.complex128, grid, components=2)

    # Two-component state
    psi.fill_with(psi_gs)

    # Initial noise
    if wigner:
        psi = psi.to_wigner_coherent(trajectories, seed=rng.randint(0, 2 ** 32 - 1))

    bs = BeamSplitter(psi, f_detuning=f_detuning, f_rabi=f_rabi)

    integrator = Integrator(
        psi,
        system,
        trajectories=trajectories,
        stepper_cls=RK46NLStepper,
        wigner=wigner,
        seed=rng.randint(0, 2 ** 32 - 1),
    )

    # Integrate
    n_bs_sampler = PopulationSampler(psi, beam_splitter=bs, theta=numpy.pi / 2)
    n_sampler = PopulationSampler(psi)
    i_sampler = InteractionSampler(psi)
    v_sampler = VisibilitySampler(psi)

    samplers = dict(N_bs=n_bs_sampler, N=n_sampler, V=v_sampler, I=i_sampler)
    weak_convergence = ["N", "V", "N_bs", "I"]

    bs(psi.data, 0, numpy.pi / 2)

    if t > 0:
        result, info = integrator.fixed_step(
            psi, 0, t, steps, samples=samples, samplers=samplers, weak_convergence=weak_convergence
        )
    else:
        info = None

    psi_type = "wigner" if wigner else "wavefunction"

    return dict(
        result=result,
        weak_errors=info.weak_errors,
        psi_type=psi_type,
        N=N,
        steps=steps,
        shape=grid.shape,
        box=grid.box,
        wigner=wigner,
    )

Exemple #33

0

Afficher le fichier

Fichier : test_2d_universe.py Projet : fjarri-attic/vienna_simulation

def test_2d_universe():

    modes = 128
    L_trap = 80.
    gamma = 0.1
    N = L_trap ** 2 / gamma
    samples = 10
    t = 10.
    nu = 0.1
    steps = 5000
    ensembles = 1
    dtype = numpy.complex128

    problem_shape = (modes, modes)
    shape = (2, ensembles) + problem_shape
    box = (L_trap, L_trap)
    dV = (L_trap / modes) ** 2

    api = ocl_api()
    #device = api.get_platforms()[0].get_devices()[1]
    #thr = api.Thread(device)
    thr = api.Thread.create()

    interaction = numpy.array([[gamma, 0], [0, gamma]])
    tunneling = [(1, nu), (0, nu)]
    nonlinear_module = nonlinear_no_potential(dtype, interaction, tunneling)

    psi = numpy.empty(shape, dtype)

    integrator = Integrator(thr, psi, box, t, steps, samples,
        kinetic_coeff=0.5,
        nonlinear_module=nonlinear_module)


    # Classical ground state
    psi.fill((1. / gamma) ** 0.5)
    psi[1] *= -1 # opposite phases of components


    # To Wigner
    rs = numpy.random.RandomState(seed=123)
    normals = rs.normal(size=(2,) + shape, scale=numpy.sqrt(0.5))
    noise_kspace = numpy.sqrt(0.5) * (normals[0] + 1j * normals[1])

    fft_scale = numpy.sqrt(dV / product(problem_shape))
    psi += numpy.fft.ifftn(noise_kspace, axes=range(2, len(shape))) / fft_scale


    psi_dev = thr.to_device(psi)
    collector = CollectorWigner2D(dV)
    results = integrator(psi_dev, [collector])

    print("Errors:", results.errors)
    assert results.errors['density'] < 1e-7
    assert results.errors['psi_strong_mean'] < 1e-7
    assert results.errors['psi_strong_max'] < 1e-7

    # Check that the population stayed constant
    N_total = results.values['Nplus_mean'] + results.values['Nminus_mean']
    # Not using N, since the initial value can differ slightly (due to initial sampling)
    N_diff = (N_total - N_total[0]) / N_total[0]
    assert numpy.abs(N_diff).max() < 1e-6

    plot_2d_universe(results.values['density'][-1], L_trap, N)

Exemple #34

0

Afficher le fichier

Fichier : AOFFT.py Projet : MyersRichard/soapy

    def __init__(self,inputSize, axes=(-1,),mode="pyfftw",dtype="complex64",
                    direction="FORWARD",fftw_FLAGS=("FFTW_MEASURE","FFTW_DESTROY_INPUT"),
                    THREADS=None, loggingLevel=None):
        self.axes = axes
        self.direction=direction

        if loggingLevel:
            logger.setLoggingLevel(loggingLevel)

        if mode=="gpu" or mode=="gpu_ocl" or mode=="gpu_cuda":
            if mode == "gpu":
                mode = "gpu_ocl"
            if REIKNA_AVAILABLE:
                if mode=="gpu_ocl":
                    try:
                        reikna_api = cluda.ocl_api()
                        self.reikna_thread = reikna_api.Thread.create()
                        self.FFTMODE="gpu"
                    except:
                        logger.warning("no reikna opencl available. \
                                            will try cuda")
                        mode = "gpu_cuda"
                if mode=="gpu_cuda":
                    try:
                        reikna_api = cluda.cuda_api()
                        self.reikna_thread = reikna_api.Thread.create()
                        self.FFTMODE="gpu"
                    except:
                        logger.warning("no cuda available. \
                                Switching to pyfftw")
                        mode = "pyfftw"
            else:
                logger.warning("No gpu algorithms available\
                        switching to pyfftw")
                mode = "pyfftw"

        if mode=="pyfftw":
            if PYFFTW_AVAILABLE:
                self.FFTMODE = "pyfftw"
            else:
                logger.warning("No pyfftw available. \
                                Defaulting to scipy.fftpack")
                mode = "scipy"

        if mode=="scipy":
            if SCIPY_AVAILABLE:
                self.FFTMODE = "scipy"
            else:
                logger.warning("No scipy available - fft won't function.")


        if self.FFTMODE=="gpu":
            if direction=="FORWARD":
                self.inverse=1
            elif direction=="BACKWARD":
                self.inverse=0

            self.inputData = numpy.zeros( inputSize, dtype=dtype)
            inputData_dev = self.reikna_thread.to_device(self.inputData)
            self.outputData_dev = self.reikna_thread.array(inputSize,
                                                     dtype=dtype)

            logger.info("Generating and compiling reikna gpu fft plan...")
            reikna_ft = reikna.fft.FFT(inputData_dev, axes=axes)
            self.reikna_ft_c = reikna_ft.compile(self.reikna_thread)
            logger.info("Done!")

        if self.FFTMODE=="pyfftw":
            if THREADS==None:
                THREADS=cpu_count()

            #fftw_FLAGS Set the optimisation level of fftw3,
            #(more optimisation takes longer - but gives quicker ffts.)
            #Can be FFTW_ESTIMATE, FFTW_MEASURE, FFT_PATIENT, FFTW_EXHAUSTIVE
            n = pyfftw.simd_alignment

            self.inputData = pyfftw.n_byte_align_empty( inputSize,n,
                                dtype)
            self.inputData[:] = numpy.zeros( inputSize, dtype=dtype)
            self.outputData = pyfftw.n_byte_align_empty(inputSize,n,
                                dtype)
            self.outputData[:] = numpy.zeros( inputSize,dtype=dtype)

            logger.info("Generating fftw3 plan....\nIf this takes too long, change fftw_FLAGS.")
            logger.debug("currently set to: {})".format(fftw_FLAGS))
            if direction=="FORWARD":
                self.fftwPlan = pyfftw.FFTW(self.inputData,self.outputData,
                                axes=axes, threads=THREADS,flags=fftw_FLAGS)
            elif direction=="BACKWARD":
                self.fftwPlan = pyfftw.FFTW(self.inputData,self.outputData,
                                direction='FFTW_BACKWARD', axes=axes,
                                threads=THREADS,flags=fftw_FLAGS)
            logger.info("Done!")


        elif self.FFTMODE=="scipy":

            self.direction=direction
            self.inputData = numpy.zeros(inputSize,dtype=dtype)
            self.size=[]
            for i in range(len(self.axes)):
                self.size.append(inputSize[self.axes[i]])

Exemple #35

0

Afficher le fichier

    def plot_it(self, controller):
        if controller.filename == '':
            messagebox.showerror('Error', 'No sdt file loaded!')
        else:
            #Checks if .sdt or .npy
            if controller.filename[-3:] == 'sdt':
                #Takes main file with all pixels#
                sdt_file = SdtFile(controller.filename)
                #Pulls the TAC paramters from the sdt file. For some reason the 'times' array from the sdt file is not the correct times - poss software not updated for new card.
                adc_re = sdt_file.measure_info[0].adc_re
                tac_r = sdt_file.measure_info[0].tac_r
                tac_g = sdt_file.measure_info[0].tac_g
                image_data = sdt_file.data[0]
                image_size_x = image_data.shape[0]
                image_size_y = image_data.shape[1]
            elif controller.filename[-3:] == 'npy':
                temp_data = np.load(controller.filename, allow_pickle=True)
                image_size_x = temp_data.shape[0]
                image_size_y = temp_data.shape[1]
                image_data = np.ndarray((image_size_x, image_size_y),
                                        dtype='object')
                for i in range(image_size_y):
                    for j in range(image_size_x):
                        image_data[i][j] = temp_data[i][j][0]
                #Need to update to pull TAC parameters from .set file, or bundle them with the numpy data
                adc_re = 4096
                tac_r = 2.5016787e-8
                tac_g = 15
            else:
                messagebox.showerror('Error', 'Invalid filetype!')
            #Gets image size and cimputes dt/times arrays

            dt = tac_r / tac_g / adc_re
            times = np.arange(0, int(adc_re)) * dt
            #Binning
            if controller.bin_toggle.get() == 1:
                bin_factor = int(controller.bin_factor.get())
                image_data = image_data.reshape(*image_data.shape[:2], -1,
                                                bin_factor).sum(axis=-1)
                dt = dt * bin_factor
                times = np.arange(0, int(adc_re / bin_factor)) * dt

            #sets end point to end if gating not required
            if controller.end_gate.get() == '0':
                end_point = len(image_data[0][0])
                start_point = 0
                number_bins = end_point - start_point
            else:
                start_point = round(
                    int((int(controller.start_gate.get()) * 1e-12) /
                        dt))  #converts back from ps to bins
                end_point = round(
                    int((int(controller.end_gate.get()) * 1e-12) / dt))
                number_bins = end_point - start_point
                image_data = image_data[:, :, start_point:end_point]
                times = times[start_point:end_point]

            #If removing the BR/Noise this takes a seperate sdt file#
            if controller.SBR_var.get() == 1:
                if controller.end_gate.get() != '0':
                    messagebox.showerror(
                        'Error', 'Cannot use noise removal with gating')
                    controller.SBR_var.set('0')
                else:
                    controller.noise_file = askopenfilename(
                        title="Choose a noise file")
                    noise_file_sdt = SdtFile(controller.noise_file)
                    noise_data = noise_file_sdt.data[0][0][
                        start_point:end_point]
                    #add binning here if we use this

            #Processes the max counts - also finds brightest pixel for IRF if desired#
            #Also notes any pixels with no real counts in it and notes the ID's for post px-ing
            max_arr = np.zeros((image_size_y, image_size_x))
            max_count_all = 0
            pixel = (0, 0)
            for i in range(image_size_y):
                for j in range(image_size_x):
                    if controller.SBR_var.get() == 1:
                        image_data[i][j] = image_data[i][j] - noise_data
                    max_count = np.amax(image_data[i][j])
                    max_arr[i][j] = max_count
                    if max_count > max_count_all:
                        max_count_all = max_count
                        pixel = (i, j)
            empty_pixels = np.transpose(np.nonzero(max_arr < 2))
            #plots the brightest and fits it to see where we're at
            centre, scale = fit_exp_mod_gauss(times,
                                              image_data[pixel[0]][pixel[1]],
                                              dt,
                                              plotting=True)
            #Checks if you're happy with gating?
            MsgBox = tk.messagebox.askquestion(
                'Proceed?', 'Are you happy with the gating?', icon='warning')
            if MsgBox == 'yes':
                #Takes brightest pixel as IRF or takes external sdt file#
                if controller.IRF_var.get() == 1:
                    IRF_pix = image_data[pixel[0]][pixel[1]]
                elif controller.fit_var.get() == 1:
                    pass
                else:
                    controller.IRF_file = askopenfilename(
                        title="Choose IRF sdt file")
                    IRF_file_sdt = SdtFile(controller.IRF_file)
                    max_irf = np.argmax(IRF_file_sdt.data[0][0])
                    half_gate = ((int(controller.end_gate.get()) -
                                  int(controller.start_gate.get())) / 2)
                    start_irf = int(max_irf - half_gate)
                    stop_irf = int(max_irf + half_gate)
                    if controller.bin_toggle.get() == 1:
                        IRF_pix = IRF_file_sdt.data[0][0]
                        IRF_pix = IRF_pix.reshape(int(adc_re / bin_factor),
                                                  -1).sum(axis=1)
                        IRF_pix = IRF_pix[start_irf:stop_irf]
                    else:
                        IRF_pix = IRF_file_sdt.data[0][0][start_irf:stop_irf]
                    #Add binning here too
                    plt.plot(IRF_pix)
                    plt.show()

                #prepare an empty 2d arr
                img_arr = np.zeros((image_size_y, image_size_x))

                #either fit or X-corr
                if controller.fit_var.get() == 1:
                    for i in range(image_size_y):
                        for j in range(image_size_x):
                            try:
                                centre, scale = fit_exp_mod_gauss(
                                    times, image_data[i][j], dt)
                                img_arr[i, j] = centre
                            except TypeError:
                                img_arr[i, j] = float('nan')
                            except RuntimeError:
                                img_arr[i, j] = float('nan')
                else:
                    t0 = time.time()
                    #set thread up
                    api = ocl_api()
                    device = api.get_platforms()[0].get_devices()[
                        2]  #Do in setting later
                    #print('Performing on {}'.format(device))
                    thread = api.Thread(device)
                    thread.device_params.local_mem_size = 32768  #Due to Apple bug - reported 01/21 by Bogdan
                    #Take IRF, FFT and conj it on CPU and then send to GPU (1)
                    IRF_pix = np.conj(np.fft.fft(IRF_pix))  #A complex128
                    #chop up the data into 64x64 chunks as at a adc_re of 4096 (max) this is the largest the AMD card can handle.
                    image_data = cubify(image_data, (64, 64, number_bins))
                    IRF_arr = np.full(image_data[0].shape, IRF_pix)
                    out_arr = np.zeros(
                        (image_data.shape[0], image_data.shape[1],
                         image_data.shape[2]))  #just for max vals

                    t0 = time.time()
                    image_data = image_data.astype('float64')
                    IRF_dev = thread.to_device(IRF_arr)  #send to GPU
                    res_dev = thread.array(image_data[0].shape, np.complex128)
                    planC = XCorr2d(image_data[0], IRF_arr).compile(thread)
                    for i in range(image_data.shape[0]):
                        data_dev = thread.to_device(image_data[i])
                        planC(res_dev, data_dev, IRF_dev)
                        result = np.roll(res_dev.get(),
                                         int(number_bins / 2),
                                         axis=2)
                        out_arr[i] = np.argmax(result, axis=2)
                    #put it back into a big image
                    img_arr = unblockshaped(out_arr, image_size_x,
                                            image_size_y)
                #Scale and convert to mm
                img_arr = -img_arr - np.nanmean(-img_arr)
                img_arr = img_arr * dt * 1e6 * 3e8 * 0.5
                #Bit of data manipulation for any pixels that are huge
                if controller.cut_off_max.get() != '0':
                    super_threshold_indices = img_arr > float(
                        controller.cut_off_max.get())
                    img_arr[super_threshold_indices] = np.mean(img_arr)

                #If pixels have no counts (or just noise) then leave them out.
                if controller.remove_empty_pix.get() == 1:
                    for i in empty_pixels:
                        img_arr[i[0], i[1]] = np.nan

                ####PLOTS####
                #3d#
            # fig1=plt.figure()
            # f1_ax=fig1.gca(projection='3d')
            # x_data=np.arange(image_size_x)
            # y_data=np.arange(image_size_y)
            # x_data, y_data=np.meshgrid(x_data, y_data)
            # surf=f1_ax.plot_surface(x_data, y_data, img_arr, cmap=cm.jet)
            # fig1.colorbar(surf, shrink=0.5, aspect=5)
            # fig1.suptitle('3D')

            #2d#
            # fig2=plt.figure()
            # flat_plot=plt.imshow(img_arr, cmap=cm.jet)
            # fig2.colorbar(flat_plot, shrink=0.5, aspect=5)
            # fig2.suptitle('2D')

            #counts#
                tfin = time.time() - t0
                print('Time to process: {} seconds'.format(tfin))
                fig3 = plt.figure()
                cnt_map = plt.imshow(max_arr, cmap=cm.jet, origin='lower')
                fig3.colorbar(cnt_map, shrink=0.5, aspect=5)
                print(np.mean(max_arr))
                #fig3.suptitle('Counts Map')

                fig5 = go.Figure(data=[
                    go.Heatmap(z=img_arr,
                               colorscale='Jet',
                               colorbar=dict(thickness=80,
                                             ticklen=3,
                                             tickcolor='black',
                                             tickfont=dict(size=36,
                                                           color='black')))
                ])
                fig5.update_layout(width=1500,
                                   height=1500,
                                   font=dict(family="Arial",
                                             size=36,
                                             color="black"))
                fig5.show()

                if controller.aspect_togg.get() == 1:
                    if controller.scene_size.get() == '':
                        messagebox.showerror(
                            'Error', 'You have not entered a scene size!')
                        aspect_dict = dict(x=1, y=1, z=1)
                    else:
                        scene_size = int(controller.scene_size.get())  #mm
                        aspect_dict = dict(x=1,
                                           y=1,
                                           z=np.nanmax(img_arr) /
                                           (scene_size * 1e3))
                else:
                    aspect_dict = dict(x=1, y=1, z=1)

                fig6 = go.Figure(data=[
                    go.Surface(z=img_arr,
                               colorscale='Jet',
                               colorbar=dict(thickness=40,
                                             ticklen=3,
                                             tickcolor='black',
                                             tickfont=dict(size=9,
                                                           color='black')))
                ])
                fig6.update_layout(font=dict(family="Arial",
                                             size=9,
                                             color="black"),
                                   scene=dict(aspectmode='manual',
                                              aspectratio=aspect_dict))
                fig6.show()

                #plots one histogram from a bright pixel to check for gating errors etc and check fit
                #fig4=plt.figure()
                #plt.plot(times[start_point:end_point], sdt_file.data[0][pixel[0]][pixel[1]][start_point:end_point],'o')
                #fig4.suptitle('Brightest histogram')

                plt.show()
            else:
                pass

Exemple #36

0

Afficher le fichier

 def __init__(self): 
     self.api = cluda.ocl_api()  
     super(OpenClContext, self).__init__()