def get_gauss_2d(shape, sigma, pixel_size=1, fourier=False, queue=None, block=False): """Get 2D Gaussian of *shape* with standard deviation *sigma* and *pixel_size*. If *fourier* is True the fourier transform of it is returned so it is faster for usage by convolution. Use command *queue* if specified. If *block* is True, wait for the kernel to finish. """ shape = make_tuple(shape) pixel_size = get_magnitude(make_tuple(pixel_size)) sigma = get_magnitude(make_tuple(sigma)) LOG.debug('get_gauss_2d, shape: %s, sigma: %s, pixel size: %s, fourier: %s', shape, sigma, pixel_size, fourier) if queue is None: queue = cfg.OPENCL.queue out = cl.array.Array(queue, shape, dtype=cfg.PRECISION.np_float) if fourier: ev = cfg.OPENCL.programs['improc'].gauss_2d_f(queue, shape[::-1], None, out.data, g_util.make_vfloat2(sigma[1], sigma[0]), g_util.make_vfloat2(pixel_size[1], pixel_size[0])) else: ev = cfg.OPENCL.programs['improc'].gauss_2d(queue, shape[::-1], None, out.data, g_util.make_vfloat2(sigma[1], sigma[0]), g_util.make_vfloat2(pixel_size[1], pixel_size[0])) if block: ev.wait() return out
def project_metaballs_naive(metaballs, shape, pixel_size, offset=None, z_step=None, queue=None, out=None, block=False): """Project a list of :class:`.MetaBall` on an image plane with *shape*, *pixel_size*. *z_step* is the physical step in the z-dimension, if not specified it is the same as *pixel_size*. *offset* is the physical spatial body offset as (y, x). Use OpenCL *queue* and *out* pyopencl Array instance for returning the result. If *block* is True, wait for the kernel to finish. """ def get_extrema(sgn): func = np.max if sgn > 0 else np.min x_ps = util.make_tuple(pixel_size)[1] res = [(ball.position[2] + sgn * (2 * ball.radius + x_ps)).simplified.magnitude for ball in metaballs] return func(res) if offset is None: offset = (0, 0) * q.m if not queue: queue = cfg.OPENCL.queue if out is None: out = cl_array.Array(queue, shape, cfg.PRECISION.np_float) string = b"".join([body.pack() for body in metaballs]) data = np.fromstring(string, dtype=np.float32) data = cl_array.to_device(queue, data) n, m = shape ps = util.make_tuple(pixel_size.simplified.magnitude) z_step = ps[1] if z_step is None else z_step.simplified.magnitude z_range = get_extrema(-1), get_extrema(1) offset = g_util.make_vfloat2(*offset.simplified.magnitude[::-1]) ev = cfg.OPENCL.programs["geometry"].naive_metaballs( cfg.OPENCL.queue, (m, n), None, out.data, data.data, np.int32(len(metaballs)), offset, g_util.make_vfloat2(*z_range), cfg.PRECISION.np_float(z_step), g_util.make_vfloat2(*ps[::-1]), np.int32(True), ) if block: ev.wait() return out
def project_metaballs(metaballs, shape, pixel_size, offset=None, queue=None, out=None, block=False): """Project a list of :class:`.MetaBall` on an image plane with *shape*, *pixel_size*. *offset* is the physical spatial body offset as (y, x). Use OpenCL *queue* and *out* pyopencl Array instance for returning the result. If *block* is True, wait for the kernel to finish. """ string = b"".join([body.pack() for body in metaballs]) n, m = shape ps = pixel_size.simplified.magnitude if offset is None: offset = (0, 0) * q.m if not queue: queue = cfg.OPENCL.queue bodies_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=string) pbodies_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_WRITE, size=m * n * cfg.MAX_META_BODIES * 4 * 7) left_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_WRITE, size=m * n * 2 * cfg.MAX_META_BODIES) right_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_WRITE, size=m * n * 2 * cfg.MAX_META_BODIES) offset = g_util.make_vfloat2(*offset.simplified.magnitude[::-1]) if out is None: out = cl_array.Array(queue, shape, cfg.PRECISION.np_float) ev = cfg.OPENCL.programs["geometry"].metaballs( cfg.OPENCL.queue, (m, n), None, out.data, bodies_mem, pbodies_mem, left_mem, right_mem, np.int32(len(metaballs)), offset, cl_array.vec.make_int2(0, 0), cl_array.vec.make_int4(0, 0, m, n), g_util.make_vfloat2(ps[1], ps[0]), np.int32(True), ) if block: ev.wait() return out
def _transfer_real( self, shape, center, pixel_size, energy, exponent, compute_phase, is_parabola, out, queue, block, flux=1, ): flux = (self.get_flux(energy, None, pixel_size).rescale( 1 / q.s).magnitude.astype(cfg.PRECISION.np_float)) cl_image = gutil.get_image(flux, queue=queue) sampler = cl.Sampler(cfg.OPENCL.ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.LINEAR) cl_center = gutil.make_vfloat3(*center) cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1]) cl_input_ps = gutil.make_vfloat2( *self._pixel_size.simplified.magnitude[::-1]) z_sample = self.sample_distance.simplified.magnitude lam = energy_to_wavelength(energy).simplified.magnitude kernel = cfg.OPENCL.programs["physics"].make_flat_from_2D_profile ev = kernel( queue, shape[::-1], None, out.data, cl_image, sampler, cl_center, cl_ps, cl_input_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(exponent), np.int32(compute_phase), np.int32(is_parabola), ) if block: ev.wait()
def rescale(image, shape, sampler=None, queue=None, out=None, block=False): """Rescale *image* to *shape* and use *sampler* which is a :class:`pyopencl.Sampler` instance. Use OpenCL *queue* and *out* pyopencl Array. If *block* is True, wait for the copy to finish. """ if cfg.PRECISION.cl_float == 8: raise TypeError("Double precision mode not supported") shape = make_tuple(shape) # OpenCL order factor = float(shape[1]) / image.shape[1], float(shape[0]) / image.shape[0] LOG.debug("rescale, shape: %s, final_shape: %s, factor: %s", image.shape, shape, factor) if queue is None: queue = cfg.OPENCL.queue if out is None: out = cl.array.Array(queue, shape, dtype=cfg.PRECISION.np_float) if not sampler: sampler = cl.Sampler( cfg.OPENCL.ctx, False, cl.addressing_mode.CLAMP_TO_EDGE, cl.filter_mode.LINEAR ) image = g_util.get_image(image) ev = cfg.OPENCL.programs["improc"].rescale( queue, shape[::-1], None, image, out.data, sampler, g_util.make_vfloat2(*factor) ) if block: ev.wait() return out
def rescale(image, shape, sampler=None, queue=None, out=None, block=False): """Rescale *image* to *shape* and use *sampler* which is a :class:`pyopencl.Sampler` instance. Use OpenCL *queue* and *out* pyopencl Array. If *block* is True, wait for the copy to finish. """ if cfg.PRECISION.cl_float == 8: raise TypeError('Double precision mode not supported') shape = make_tuple(shape) # OpenCL order factor = float(shape[1]) / image.shape[1], float(shape[0]) / image.shape[0] LOG.debug('rescale, shape: %s, final_shape: %s, factor: %s', image.shape, shape, factor) if queue is None: queue = cfg.OPENCL.queue if out is None: out = cl.array.Array(queue, shape, dtype=cfg.PRECISION.np_float) if not sampler: sampler = cl.Sampler(cfg.OPENCL.ctx, False, cl.addressing_mode.CLAMP_TO_EDGE, cl.filter_mode.LINEAR) image = g_util.get_image(image) ev = cfg.OPENCL.programs['improc'].rescale(queue, shape[::-1], None, image, out.data, sampler, g_util.make_vfloat2(*factor)) if block: ev.wait() return out
def get_roots(self, coeffs, interval, previous_coeffs=None, next_coeffs=None): if previous_coeffs is None: previous_coeffs = (self.poly_deg + 1) * [np.nan] previous_coeffs_mem = get_coeffs_mem(previous_coeffs) if next_coeffs is None: next_coeffs = (self.poly_deg + 1) * [np.nan] next_coeffs_mem = get_coeffs_mem(next_coeffs) self.prg.roots_kernel( cfg.OPENCL.queue, (1, ), None, self.roots_mem, previous_coeffs_mem, get_coeffs_mem(coeffs), next_coeffs_mem, g_util.make_vfloat2(interval[0], interval[1]), cfg.PRECISION.np_float(self.pixel_size), ) res = np.empty(5, dtype=cfg.PRECISION.np_float) cl.enqueue_copy(cfg.OPENCL.queue, res, self.roots_mem) return res
def _transfer_real(self, shape, center, pixel_size, energy, exponent, compute_phase, is_parabola, out, queue, block, flux=1): """Compte the actual wavefield. *center*, *pixel_size*, *sample_distance* and *wavelength* are all unitless values which can be passed directly to OpenCL kernels. """ cl_center = gutil.make_vfloat3(*center) cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1]) z_sample = self.sample_distance.simplified.magnitude lam = energy_to_wavelength(energy).simplified.magnitude kernel = cfg.OPENCL.programs['physics'].make_flat_from_scalar ev = kernel(queue, shape[::-1], None, out.data, cfg.PRECISION.np_float(flux), cl_center, cl_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(exponent), np.int32(compute_phase), np.int32(is_parabola)) if block: ev.wait()
def _project(self, shape, pixel_size, offset, t=None, queue=None, out=None, block=False): """Projection implementation.""" def get_crop(index, fov): minimum = max(self.extrema[index][0], fov[index][0]) maximum = min(self.extrema[index][1], fov[index][1]) return minimum - offset[::-1][index], maximum - offset[::-1][index] def get_px_value(value, round_func, ps): return int(round_func(get_magnitude(value / ps))) # Move to the desired location, apply the T matrix and resort the triangles self.transform() self.sort() psm = pixel_size.simplified.magnitude fov = offset + shape * pixel_size fov = np.concatenate((offset.simplified.magnitude[::-1], fov.simplified.magnitude[::-1])).reshape(2, 2).transpose() * q.m if out is None: out = cl_array.zeros(queue, shape, dtype=cfg.PRECISION.np_float) if (self.extrema[0][0] < fov[0][1] and self.extrema[0][1] > fov[0][0] and self.extrema[1][0] < fov[1][1] and self.extrema[1][1] > fov[1][0]): # Object inside FOV x_min, x_max = get_crop(0, fov) y_min, y_max = get_crop(1, fov) x_min_px = get_px_value(x_min, np.floor, pixel_size[1]) x_max_px = get_px_value(x_max, np.ceil, pixel_size[1]) y_min_px = get_px_value(y_min, np.floor, pixel_size[0]) y_max_px = get_px_value(y_max, np.ceil, pixel_size[0]) width = min(x_max_px - x_min_px, shape[1]) height = min(y_max_px - y_min_px, shape[0]) compute_offset = cl_array.vec.make_int2(x_min_px, y_min_px) v_1, v_2, v_3 = self._make_inputs(queue, pixel_size) max_dx = self.max_triangle_x_diff.simplified.magnitude / psm[1] # Use the same pixel size as for the x-axis, which will work for objects "not too far" # from the imaging plane min_z = self.extrema[2][0].simplified.magnitude / psm[1] offset = gutil.make_vfloat2(*(offset / pixel_size).simplified.magnitude[::-1]) ev = cfg.OPENCL.programs['mesh'].compute_thickness(queue, (width, height), None, v_1.data, v_2.data, v_3.data, out.data, np.int32(self.num_triangles), np.int32(shape[1]), compute_offset, offset, cfg.PRECISION.np_float(psm[1]), cfg.PRECISION.np_float(max_dx), cfg.PRECISION.np_float(min_z), np.int32(self.iterations)) if block: ev.wait() return out
def _transfer_real( self, shape, center, pixel_size, energy, exponent, compute_phase, is_parabola, out, queue, block, ): """Compute the flat field wavefield. Returned *out* array is different from the input one. """ cl_center = gutil.make_vfloat3(*center) cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1]) fov = np.arange(0, shape[0]) * pixel_size[0] - center[1] * q.m angles = np.arctan((fov / self.sample_distance).simplified) profile = (self._create_vertical_profile( energy, angles, pixel_size[0]).rescale(1 / q.s).magnitude) profile = cl_array.to_device(queue, profile.astype(cfg.PRECISION.np_float)) z_sample = self.sample_distance.simplified.magnitude lam = energy_to_wavelength(energy).simplified.magnitude kernel = cfg.OPENCL.programs["physics"].make_flat_from_vertical_profile ev = kernel( queue, shape[::-1], None, out.data, profile.data, cl_center, cl_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(exponent), np.int32(compute_phase), np.int32(is_parabola), ) if block: ev.wait()
def get_roots(self, coeffs, interval, previous_coeffs=None, next_coeffs=None): if previous_coeffs is None: previous_coeffs = (self.poly_deg + 1) * [np.nan] previous_coeffs_mem = get_coeffs_mem(previous_coeffs) if next_coeffs is None: next_coeffs = (self.poly_deg + 1) * [np.nan] next_coeffs_mem = get_coeffs_mem(next_coeffs) self.prg.roots_kernel(cfg.OPENCL.queue, (1,), None, self.roots_mem, previous_coeffs_mem, get_coeffs_mem(coeffs), next_coeffs_mem, g_util.make_vfloat2(interval[0], interval[1]), cfg.PRECISION.np_float(self.pixel_size)) res = np.empty(5, dtype=cfg.PRECISION.np_float) cl.enqueue_copy(cfg.OPENCL.queue, res, self.roots_mem) return res
def _transfer(self, shape, pixel_size, energy, offset, exponent=False, t=None, queue=None, out=None, check=True, block=False): """Compute the flat field wavefield. Returned *out* array is different from the input one.""" if queue is None: queue = cfg.OPENCL.queue ps = make_tuple(pixel_size) if t is None: x, y, z = self.trajectory.control_points.simplified.magnitude[0] else: x, y, z = self.trajectory.get_point(t).simplified.magnitude x += offset[1].simplified.magnitude y += offset[0].simplified.magnitude center = (x, y, z) cl_center = gutil.make_vfloat3(*center) cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1]) fov = np.arange(0, shape[0]) * ps[0] - y * q.m angles = np.arctan((fov / self.sample_distance).simplified) profile = self._create_vertical_profile(energy, angles, ps[0]).rescale( 1 / q.s).magnitude profile = cl_array.to_device(queue, profile.astype(cfg.PRECISION.np_float)) if out is None: out = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx) z_sample = self.sample_distance.simplified.magnitude lam = energy_to_wavelength(energy).simplified.magnitude phase = self.phase_profile != 'plane' parabola = self.phase_profile == 'parabola' if exponent or check and phase: ev = cfg.OPENCL.programs['physics'].make_flat( queue, shape[::-1], None, out.data, profile.data, cl_center, cl_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(True), np.int32(phase), np.int32(parabola)) if check and phase and not is_wavefield_sampling_ok(out, queue=queue): LOG.error('Insufficient beam phase sampling') if not exponent: out = clmath.exp(out, queue=queue) else: ev = cfg.OPENCL.programs['physics'].make_flat( queue, shape[::-1], None, out.data, profile.data, cl_center, cl_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(exponent), np.int32(phase), np.int32(parabola)) if block: ev.wait() return out
def _project(self, shape, pixel_size, offset, t=None, queue=None, out=None, block=False): """Projection implementation.""" def get_crop(index, fov): minimum = max(self.extrema[index][0], fov[index][0]) maximum = min(self.extrema[index][1], fov[index][1]) return minimum - offset[::-1][index], maximum - offset[::-1][index] def get_px_value(value, round_func, ps): return int(round_func(get_magnitude(value / ps))) # Move to the desired location, apply the T matrix and resort the triangles self.transform() self.sort() psm = pixel_size.simplified.magnitude fov = offset + shape * pixel_size fov = np.concatenate( (offset.simplified.magnitude[::-1], fov.simplified.magnitude[::-1])).reshape(2, 2).transpose() * q.m if out is None: out = cl_array.zeros(queue, shape, dtype=cfg.PRECISION.np_float) if (self.extrema[0][0] < fov[0][1] and self.extrema[0][1] > fov[0][0] and self.extrema[1][0] < fov[1][1] and self.extrema[1][1] > fov[1][0]): # Object inside FOV x_min, x_max = get_crop(0, fov) y_min, y_max = get_crop(1, fov) x_min_px = get_px_value(x_min, np.floor, pixel_size[1]) x_max_px = get_px_value(x_max, np.ceil, pixel_size[1]) y_min_px = get_px_value(y_min, np.floor, pixel_size[0]) y_max_px = get_px_value(y_max, np.ceil, pixel_size[0]) width = min(x_max_px - x_min_px, shape[1]) height = min(y_max_px - y_min_px, shape[0]) compute_offset = cl_array.vec.make_int2(x_min_px, y_min_px) v_1, v_2, v_3 = self._make_inputs(queue, pixel_size) max_dx = self.max_triangle_x_diff.simplified.magnitude / psm[1] # Use the same pixel size as for the x-axis, which will work for objects "not too far" # from the imaging plane min_z = self.extrema[2][0].simplified.magnitude / psm[1] offset = gutil.make_vfloat2( *(offset / pixel_size).simplified.magnitude[::-1]) ev = cfg.OPENCL.programs['mesh'].compute_thickness( queue, (width, height), None, v_1.data, v_2.data, v_3.data, out.data, np.int32(self.num_triangles), np.int32(shape[1]), compute_offset, offset, cfg.PRECISION.np_float(psm[1]), cfg.PRECISION.np_float(max_dx), cfg.PRECISION.np_float(min_z), np.int32(self.iterations)) if block: ev.wait() return out
def compute_propagator(size, distance, lam, pixel_size, fresnel=True, region=None, apply_phase_factor=False, mollified=True, queue=None, block=False): """Create a propagator with (*size*, *size*) dimensions for propagation *distance*, wavelength *lam* and *pixel_size*. If *fresnel* is True, use the Fresnel approximation, if it is False, use the full propagator (don't approximate the square root). *region* is the diameter of the the wavefront area which is capable of interference. If *apply_phase_factor* is True, apply the phase factor defined by Fresnel approximation. If *mollified* is True the aliased frequencies are suppressed. If command *queue* is specified, execute the kernel on it. If *block* is True, wait for the kernel to finish. """ if size % 2: raise ValueError('Only even sizes are supported') if queue is None: queue = cfg.OPENCL.queue pixel_size = make_tuple(pixel_size) def check_cutoff(ps): # Check the sampling r_cutoff = compute_aliasing_limit(size, lam, ps, distance, fov=region, fourier=False) min_n = 4 if r_cutoff < min_n: LOG.error('Propagator too narrow, propagation distance too small or pixel size too large') f_cutoff = compute_aliasing_limit(size, lam, ps, distance, fov=region, fourier=True) if f_cutoff < min_n: LOG.error('Propagator too wide, propagation distance too large or pixel size too small') check_cutoff(pixel_size[1]) check_cutoff(pixel_size[0]) out = cl_array.Array(queue, (size, size), cfg.PRECISION.np_cplx) if apply_phase_factor: phase_factor = np.exp(2 * np.pi * distance.simplified / lam.simplified * 1j) else: phase_factor = 0 + 0j ev = cfg.OPENCL.programs['physics'].propagator(queue, (size / 2 + 1, size / 2 + 1), None, out.data, cfg.PRECISION.np_float(distance.simplified), cfg.PRECISION.np_float(lam.simplified), g_util.make_vfloat2(*pixel_size[::-1].simplified), g_util.make_vcomplex(phase_factor), np.int32(fresnel)) if block: ev.wait() if mollified: def compute_sigma_component(ps): fwtm = compute_aliasing_limit(size, lam, ps, distance, fov=size * ps, fourier=True) if region is not None: fwtm_region = compute_aliasing_limit(size, lam, ps, distance, region, fourier=True) fwtm = min(fwtm_region, fwtm) sigma = fwnm_to_sigma(fwtm, n=10) return sigma sigma = (compute_sigma_component(pixel_size[0]), compute_sigma_component(pixel_size[1])) mollifier = get_gauss_2d(size, sigma, fourier=False, queue=queue, block=block) out = out * mollifier return out
def compute_propagator( size, distance, lam, pixel_size, fresnel=True, region=None, apply_phase_factor=False, mollified=True, queue=None, block=False, ): """Create a propagator with (*size*, *size*) dimensions for propagation *distance*, wavelength *lam* and *pixel_size*. If *fresnel* is True, use the Fresnel approximation, if it is False, use the full propagator (don't approximate the square root). *region* is the diameter of the the wavefront area which is capable of interference. If *apply_phase_factor* is True, apply the phase factor defined by Fresnel approximation. If *mollified* is True the aliased frequencies are suppressed. If command *queue* is specified, execute the kernel on it. If *block* is True, wait for the kernel to finish. """ if size % 2: raise ValueError("Only even sizes are supported") if queue is None: queue = cfg.OPENCL.queue pixel_size = make_tuple(pixel_size) def check_cutoff(ps): # Check the sampling r_cutoff = compute_aliasing_limit(size, lam, ps, distance, fov=region, fourier=False) min_n = 4 if r_cutoff < min_n: LOG.warning( "Propagator too narrow, propagation distance too small or pixel size too large" ) f_cutoff = compute_aliasing_limit(size, lam, ps, distance, fov=region, fourier=True) if f_cutoff < min_n: LOG.warning( "Propagator too wide, propagation distance too large or pixel size too small" ) check_cutoff(pixel_size[1]) check_cutoff(pixel_size[0]) out = cl_array.Array(queue, (size, size), cfg.PRECISION.np_cplx) if apply_phase_factor: phase_factor = np.exp(2 * np.pi * distance.simplified / lam.simplified * 1j) else: phase_factor = 0 + 0j ev = cfg.OPENCL.programs["physics"].propagator( queue, (size // 2 + 1, size // 2 + 1), None, out.data, cfg.PRECISION.np_float(distance.simplified), cfg.PRECISION.np_float(lam.simplified), g_util.make_vfloat2(*pixel_size[::-1].simplified), g_util.make_vcomplex(phase_factor), np.int32(fresnel), ) if block: ev.wait() if mollified: def compute_sigma_component(ps): fwtm = compute_aliasing_limit(size, lam, ps, distance, fov=size * ps, fourier=True) if region is not None: fwtm_region = compute_aliasing_limit(size, lam, ps, distance, region, fourier=True) fwtm = min(fwtm_region, fwtm) sigma = fwnm_to_sigma(fwtm, n=10) return sigma sigma = (compute_sigma_component(pixel_size[0]), compute_sigma_component(pixel_size[1])) mollifier = get_gauss_2d(size, sigma, fourier=False, queue=queue, block=block) out = out * mollifier return out