def _run(self): layer = self.buffer('layer') image = self.buffer('image') assert layer.padded_shape == image.padded_shape[-2:] half_size = image.shape[-1] // 2 row_stride = image.padded_shape[-1] slice_stride = row_stride * image.padded_shape[-2] kernel1d = self.buffer('kernel1d') with profile_device(self.command_queue, self.kernel_name): self.command_queue.enqueue_kernel( self.kernel, [ image.buffer, layer.buffer, np.int32(self.polarization * slice_stride), np.int32(row_stride), kernel1d.buffer, self.template.real_dtype.type(self.lm_scale), self.template.real_dtype.type(self.lm_bias), np.int32(half_size), np.int32(half_size * row_stride), self.template.real_dtype.type(half_size * self.lm_scale), self.template.real_dtype.type(2 * self.w) ], global_size=(accel.roundup(half_size, self.template.wgs_x), accel.roundup(half_size, self.template.wgs_y)), local_size=(self.template.wgs_x, self.template.wgs_y) )
def _run(self): if self.beam is None: raise ValueError('Must set beam') M = beam_covariance_sqrt(self.beam) # Due to https://github.com/astropy/astropy/issues/1105, the determinant # can be negative; hence, the np.abs is necessary. amplitude = 2 * np.pi * self.beam.model.amplitude * np.abs( np.linalg.det(M)) # CUFFT, unlikely numpy.fft, does not normalize the inverse transform. # We fold the normalization into the amplitude amplitude /= self.image_shape[0] * self.image_shape[1] # The kernel has integral coordinates, which we need to convert to # the range [-1, 1). We fold this into the matrix. M *= np.asmatrix( np.diag([1.0 / self.image_shape[0], 1.0 / self.image_shape[1]])) # Compute overall matrix for the exponent C = -2 * np.pi**2 * M.T * M real = self.template.dtype.type data = self.buffer('data') with profile_device(self.command_queue, 'fourier_beam'): self.command_queue.enqueue_kernel( self._kernel, [ data.buffer, np.int32(data.padded_shape[1]), real(amplitude), real(C[0, 0]), real(2 * C[0, 1]), real(C[1, 1]), np.int32(data.shape[1]), np.int32(data.shape[0]) ], global_size=(accel.roundup(data.shape[1], self.template.wgs_x), accel.roundup(data.shape[0], self.template.wgs_y)), local_size=(self.template.wgs_x, self.template.wgs_y))
def _run(self): if self.num_vis == 0 or self.num_sources == 0: return uv = self.buffer('uv') w_plane = self.buffer('w_plane') lmn = self.buffer('lmn') flux = self.buffer('flux') vis = self.buffer('vis') weights = self.buffer('weights') uv_scale, w_scale, w_bias = _uvw_scale_bias(self.image_parameters, self.grid_parameters) w_bias += self._w with profile_device(self.command_queue, 'predict'): self.command_queue.enqueue_kernel( self._kernel, [ vis.buffer, uv.buffer, w_plane.buffer, weights.buffer, lmn.buffer, flux.buffer, np.int32(self.num_vis), np.int32(self.num_sources), np.int32(self.grid_parameters.fixed.oversample), np.float32(uv_scale), np.float32(w_scale), np.float32(w_bias) ], global_size=(accel.roundup(self.num_vis, self.template.wgs),), local_size=(self.template.wgs,) )
def _run(self): data = self.buffer('data') self.command_queue.enqueue_kernel(self.kernel, [data.buffer, self.scale], global_size=(roundup( data.shape[0], self.WGS), ), local_size=(self.WGS, ))
def _run(self): data = self.buffer('data') with profile_device(self.command_queue, 'scale'): self.command_queue.enqueue_kernel( self.kernel, [ data.buffer, np.int32(data.padded_shape[2]), np.int32(data.padded_shape[1] * data.padded_shape[2]), np.int32(data.shape[2]), np.int32(data.shape[1]), self.scale_factor ], global_size=(accel.roundup(data.shape[2], self.template.wgsx), accel.roundup(data.shape[1], self.template.wgsy)), local_size=(self.template.wgsx, self.template.wgsy) )
def _run(self): grid = self.buffer('grid') sums = self.buffer('sums') self.command_queue.enqueue_zero_buffer(sums.buffer) with profile_device(self.command_queue, 'mean_weight'): self.command_queue.enqueue_kernel( self._kernel, [ sums.buffer, grid.buffer, np.int32(grid.padded_shape[-1]), np.int32(grid.shape[2]), np.int32(grid.shape[1]) ], global_size=(accel.roundup(grid.shape[2], self.template.wgs_x), accel.roundup(grid.shape[1], self.template.wgs_y)), local_size=(self.template.wgs_x, self.template.wgs_y)) sums.get(self.command_queue, self._sums_host) return self._sums_host[1] / self._sums_host[0]
def __call__(self, pos, psf_patch, **kwargs): """Execute the operation. Parameters ---------- pos : tuple row, col in image at which to center the PSF psf_patch : tuple num_polarizations, height, width of central area of PSF to subtract (num_polarizations is ignored) """ self.bind(**kwargs) self.ensure_all_bound() dirty = self.buffer('dirty') model = self.buffer('model') psf = self.buffer('psf') psf_y = psf.shape[1] // 2 - psf_patch[1] // 2 psf_x = psf.shape[2] // 2 - psf_patch[2] // 2 psf_addr_offset = psf_y * psf.padded_shape[2] + psf_x with profile_device(self.command_queue, 'subtract_psf'): self.command_queue.enqueue_kernel( self.kernel, [ dirty.buffer, model.buffer, np.int32(dirty.padded_shape[2]), np.int32(dirty.padded_shape[1] * dirty.padded_shape[2]), np.int32(dirty.shape[2]), np.int32(dirty.shape[1]), psf.buffer, np.int32(psf.padded_shape[2]), np.int32(psf.padded_shape[1] * psf.padded_shape[2]), np.int32(psf_patch[2]), np.int32(psf_patch[1]), np.int32(psf_addr_offset), self.buffer('peak_pixel').buffer, np.int32(pos[1]), np.int32(pos[0]), np.int32(pos[1] - psf_patch[2] // 2), np.int32(pos[0] - psf_patch[1] // 2), np.float32(self.loop_gain) ], global_size=(accel.roundup(psf_patch[2], self.template.wgsx), accel.roundup(psf_patch[1], self.template.wgsy)), local_size=(self.template.wgsx, self.template.wgsy))
def _run(self): data = self.buffer('data') beam_power = self.buffer('beam_power') with profile_device(self.command_queue, 'apply_primary_beam'): self.command_queue.enqueue_kernel( self.kernel, [ data.buffer, beam_power.buffer, np.int32(data.padded_shape[2]), np.int32(data.padded_shape[1] * data.padded_shape[2]), np.int32(data.shape[2]), np.int32(data.shape[1]), data.dtype.type(self.threshold), data.dtype.type(self.replacement) ], global_size=(accel.roundup(data.shape[2], self.template.wgsx), accel.roundup(data.shape[1], self.template.wgsy)), local_size=(self.template.wgsx, self.template.wgsy) )
def _run(self): src = self.buffer('src') dest = self.buffer('dest') with profile_device(self.command_queue, 'add_image'): self.command_queue.enqueue_kernel( self.kernel, [ dest.buffer, src.buffer, np.int32(dest.padded_shape[2]), np.int32(dest.padded_shape[1] * dest.padded_shape[2]), np.int32(src.padded_shape[2]), np.int32(src.padded_shape[1] * src.padded_shape[2]), np.int32(src.shape[2]), np.int32(src.shape[1]) ], global_size=(accel.roundup(src.shape[2], self.template.wgsx), accel.roundup(src.shape[1], self.template.wgsy)), local_size=(self.template.wgsx, self.template.wgsy) )
def _run(self): grid = self.buffer('grid') sums = self.buffer('sums') sums.zero(self.command_queue) with profile_device(self.command_queue, 'density_weights'): self.command_queue.enqueue_kernel( self._kernel, [ sums.buffer, grid.buffer, np.int32(grid.padded_shape[2]), np.int32(grid.padded_shape[1] * grid.padded_shape[2]), np.int32(grid.shape[2]), np.int32(grid.shape[1]), np.float32(self.a), np.float32(self.b) ], global_size=(accel.roundup(grid.shape[2], self.template.wgs_x), accel.roundup(grid.shape[1], self.template.wgs_y)), local_size=(self.template.wgs_x, self.template.wgs_y)) sums.get(self.command_queue, self._sums_host) rms = np.sqrt(self._sums_host[2]) / self._sums_host[1] return rms, rms * np.sqrt(self._sums_host[0])
def _run(self): dirty = self.buffer('dirty') tile_max = self.buffer('tile_max') tile_pos = self.buffer('tile_pos') with profile_device(self.command_queue, 'find_peak'): self.command_queue.enqueue_kernel( self.kernel, [ dirty.buffer, np.int32(dirty.padded_shape[2]), np.int32(dirty.padded_shape[1] * dirty.padded_shape[2]), tile_max.buffer, tile_pos.buffer, np.int32(tile_max.padded_shape[1]), np.int32(tile_max.shape[1]), np.int32(tile_max.shape[0]), self.buffer('peak_value').buffer, self.buffer('peak_pos').buffer, self.buffer('peak_pixel').buffer ], global_size=(accel.roundup(tile_max.shape[0], self.template.wgsx), accel.roundup(tile_max.shape[1], self.template.wgsy)), local_size=(self.template.wgsx, self.template.wgsy))
def _run(self): grid = self.buffer('grid') uv = self.buffer('uv') weights = self.buffer('weights') half_v = grid.shape[1] // 2 half_u = grid.shape[2] // 2 address_bias = half_v * grid.padded_shape[2] + half_u with profile_device(self.command_queue, 'grid_weights'): self.command_queue.enqueue_kernel(self._kernel, [ grid.buffer, np.int32(grid.padded_shape[2]), np.int32(grid.padded_shape[1] * grid.padded_shape[2]), uv.buffer, weights.buffer, np.int32(address_bias), np.int32(self._num_vis) ], global_size=(accel.roundup( self._num_vis, self.template.wgs), ), local_size=(self.template.wgs, ))