def gs_mod_gpu(idata, itera=10, osize=256): cut = osize // 2 pl = cl.get_platforms()[0] devices = pl.get_devices(device_type=cl.device_type.GPU) ctx = cl.Context(devices=[devices[0]]) queue = cl.CommandQueue(ctx) plan = Plan(idata.shape, queue=queue, dtype=complex128) #no funciona con "complex128" src = str( Template(KERNEL).render( double_support=all(has_double_support(dev) for dev in devices), amd_double_support=all( has_amd_double_support(dev) for dev in devices))) prg = cl.Program(ctx, src).build() idata_gpu = cl_array.to_device(queue, ifftshift(idata).astype("complex128")) fdata_gpu = cl_array.empty_like(idata_gpu) rdata_gpu = cl_array.empty_like(idata_gpu) plan.execute(idata_gpu.data, fdata_gpu.data) mask = exp(2.j * pi * random(idata.shape)) mask[512 - cut:512 + cut, 512 - cut:512 + cut] = 0 idata_gpu = cl_array.to_device( queue, ifftshift(idata + mask).astype("complex128")) fdata_gpu = cl_array.empty_like(idata_gpu) rdata_gpu = cl_array.empty_like(idata_gpu) error_gpu = cl_array.to_device(ctx, queue, zeros(idata_gpu.shape).astype("double")) plan.execute(idata_gpu.data, fdata_gpu.data) e = 1000 ea = 1000 for i in range(itera): prg.norm(queue, fdata_gpu.shape, None, fdata_gpu.data) plan.execute(fdata_gpu.data, rdata_gpu.data, inverse=True) #~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut)) norm1 = prg.norm1 norm1.set_scalar_arg_dtypes([None, None, None, int32]) norm1(queue, rdata_gpu.shape, None, rdata_gpu.data, idata_gpu.data, error_gpu.data, int32(cut)) e = sqrt(cl_array.sum(error_gpu).get()) / (2 * cut) #~ if e>ea: #~ #~ break #~ ea=e plan.execute(rdata_gpu.data, fdata_gpu.data) fdata = fdata_gpu.get() fdata = ifftshift(fdata) fdata = exp(1.j * angle(fdata)) return fdata
def gs_mod_gpu(idata,itera=10,osize=256): cut=osize//2 pl=cl.get_platforms()[0] devices=pl.get_devices(device_type=cl.device_type.GPU) ctx = cl.Context(devices=[devices[0]]) queue = cl.CommandQueue(ctx) plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128" src = str(Template(KERNEL).render( double_support=all( has_double_support(dev) for dev in devices), amd_double_support=all( has_amd_double_support(dev) for dev in devices) )) prg = cl.Program(ctx,src).build() idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128")) fdata_gpu=cl_array.empty_like(idata_gpu) rdata_gpu=cl_array.empty_like(idata_gpu) plan.execute(idata_gpu.data,fdata_gpu.data) mask=exp(2.j*pi*random(idata.shape)) mask[512-cut:512+cut,512-cut:512+cut]=0 idata_gpu=cl_array.to_device(queue, ifftshift(idata+mask).astype("complex128")) fdata_gpu=cl_array.empty_like(idata_gpu) rdata_gpu=cl_array.empty_like(idata_gpu) error_gpu=cl_array.to_device(ctx, queue, zeros(idata_gpu.shape).astype("double")) plan.execute(idata_gpu.data,fdata_gpu.data) e=1000 ea=1000 for i in range (itera): prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data) plan.execute(fdata_gpu.data,rdata_gpu.data,inverse=True) #~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut)) norm1=prg.norm1 norm1.set_scalar_arg_dtypes([None, None, None, int32]) norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut)) e= sqrt(cl_array.sum(error_gpu).get())/(2*cut) #~ if e>ea: #~ #~ break #~ ea=e plan.execute(rdata_gpu.data,fdata_gpu.data) fdata=fdata_gpu.get() fdata=ifftshift(fdata) fdata=exp(1.j*angle(fdata)) return fdata
def gs_gpu(idata, itera=100): """Gerchberg-Saxton algorithm to calculate DOEs using the GPU Calculates the phase distribution in a object plane to obtain an specific amplitude distribution in the target plane. It uses a FFT to calculate the field propagation. The wavefront at the DOE plane is assumed as a plane wave. **ARGUMENTS:** ========== ====================================================== idata numpy array containing the target amplitude distribution itera Maximum number of iterations ========== ====================================================== """ pl = cl.get_platforms()[0] devices = pl.get_devices(device_type=cl.device_type.GPU) ctx = cl.Context(devices=[devices[0]]) queue = cl.CommandQueue(ctx) plan = Plan(idata.shape, queue=queue, dtype=complex128) #no funciona con "complex128" src = str( Template(KERNEL).render( double_support=all(has_double_support(dev) for dev in devices), amd_double_support=all( has_amd_double_support(dev) for dev in devices))) prg = cl.Program(ctx, src).build() idata_gpu = cl_array.to_device(queue, ifftshift(idata).astype("complex128")) fdata_gpu = cl_array.empty_like(idata_gpu) rdata_gpu = cl_array.empty_like(idata_gpu) plan.execute(idata_gpu.data, fdata_gpu.data) e = 1000 ea = 1000 for i in range(itera): prg.norm(queue, fdata_gpu.shape, None, fdata_gpu.data) plan.execute(fdata_gpu.data, rdata_gpu.data, inverse=True) tr = rdata_gpu.get() rdata = ifftshift(tr) #TODO: This calculation should be done in the GPU e = (abs(rdata) - idata).std() if e > ea: break ea = e prg.norm2(queue, rdata_gpu.shape, None, rdata_gpu.data, idata_gpu.data) plan.execute(rdata_gpu.data, fdata_gpu.data) fdata = fdata_gpu.get() #~ prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data) fdata = ifftshift(fdata) fdata = exp(1.j * angle(fdata)) #~ fdata=fdata_gpu.get() return fdata
def gs_gpu(idata,itera=100): """Gerchberg-Saxton algorithm to calculate DOEs using the GPU Calculates the phase distribution in a object plane to obtain an specific amplitude distribution in the target plane. It uses a FFT to calculate the field propagation. The wavefront at the DOE plane is assumed as a plane wave. **ARGUMENTS:** ========== ====================================================== idata numpy array containing the target amplitude distribution itera Maximum number of iterations ========== ====================================================== """ pl=cl.get_platforms()[0] devices=pl.get_devices(device_type=cl.device_type.GPU) ctx = cl.Context(devices=[devices[0]]) queue = cl.CommandQueue(ctx) plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128" src = str(Template(KERNEL).render( double_support=all( has_double_support(dev) for dev in devices), amd_double_support=all( has_amd_double_support(dev) for dev in devices) )) prg = cl.Program(ctx,src).build() idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128")) fdata_gpu=cl_array.empty_like(idata_gpu) rdata_gpu=cl_array.empty_like(idata_gpu) plan.execute(idata_gpu.data,fdata_gpu.data) e=1000 ea=1000 for i in range (itera): prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data) plan.execute(fdata_gpu.data,rdata_gpu.data,inverse=True) tr=rdata_gpu.get() rdata=ifftshift(tr) #TODO: This calculation should be done in the GPU e= (abs(rdata)-idata).std() if e>ea: break ea=e prg.norm2(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data) plan.execute(rdata_gpu.data,fdata_gpu.data) fdata=fdata_gpu.get() #~ prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data) fdata=ifftshift(fdata) fdata=exp(1.j*angle(fdata)) #~ fdata=fdata_gpu.get() return fdata