def take(ary, indices, backend=None): if backend is None: backend = ary.backend if backend == 'opencl': import pyopencl.array as gpuarray out = gpuarray.take(ary.dev, indices.dev) elif backend == 'cuda': import pycuda.gpuarray as gpuarray out = gpuarray.take(ary.dev, indices.dev) elif backend == 'cython': out = np.take(ary.dev, indices.dev) return wrap_array(out, backend)
def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) # l2qbxl_cost_itgt_box = l2qbxl_cost[tree.box_levels[traversal.target_boxes]] l2qbxl_cost_itgt_box = take( l2qbxl_cost, take(tree.box_levels, traversal.target_boxes, queue=queue), queue=queue ) return nqbx_centers_itgt_box * l2qbxl_cost_itgt_box
def __call__(self, im, nrays, nsamples, ray_step, seed_pt, cutoff, thresh): nrays = int(nrays) nsamples = int(nsamples) cutoff = np.int32(cutoff) arrays = self.setup_arrays(nrays, nsamples, cutoff) prog = self.build_program(nrays, nsamples, ray_step) prog.sample_rays(self.queue, (nsamples, nrays), None, arrays.scratch.data, im, np.float32(seed_pt[0]), np.float32(seed_pt[1])) # take the region in the cutoff zone cla.take(arrays.scratch, arrays.idx, out=arrays.pre_cutoff) # plt.imshow(self.pre_cutoff.get()) # plt.show() self.square_array(arrays.pre_cutoff, arrays.pre_cutoff_squared) inside_mean = cla.sum(arrays.pre_cutoff).get() / (cutoff * nrays) inside_sumsq = cla.sum(arrays.pre_cutoff_squared).get() / (cutoff * nrays) inside_std = np.sqrt(inside_sumsq - inside_mean ** 2) normed_thresh = inside_std * thresh prog.scan_boundary(self.queue, (nrays,), None, arrays.result.data, arrays.scratch.data, np.float32(normed_thresh)) # print normed_thresh # plt.figure() # plt.hold(True) # plt.imshow(arrays.scratch.get()) # plt.plot(np.arange(0, nrays), arrays.result.get()) # plt.show() return arrays.result.get()
def roulette_wheel_selection(self, population_size=32): # calculate sum over all scores total_score = pyopencl.array.sum(self.vehicle_score).get() if total_score > 0: from pyopencl.elementwise import ElementwiseKernel roulette_wheel_probabilities = ElementwiseKernel(self.context, "float total_score, float *scores, " "float *probabilities", "probabilities[i] = scores[i]/total_score", "roulette_wheel_probabilities") probabilities = pyopencl.array.empty_like(self.vehicle_score) roulette_wheel_probabilities(total_score, self.vehicle_score, probabilities) accumulated_probabilities_kernel = pyopencl.scan.InclusiveScanKernel(self.context, numpy.float32, "a+b") accumulated_probabilities_kernel(probabilities) selection_probabilities = pyopencl.array.Array(self.queue, population_size, dtype=numpy.float32) pyopencl.clrandom.RanluxGenerator(self.queue, self.work_items, seed=time.time()).fill_uniform(selection_probabilities) population_indexes = pyopencl.array.Array(self.queue, population_size, dtype=numpy.uint32) self.program.roulette_wheel_selection(self.queue, (population_size,), None, selection_probabilities.data, probabilities.data, population_indexes.data, numpy.uint32(self.number_of_cars)) associated_scores = array.take(self.vehicle_score, population_indexes, queue=self.queue) return population_indexes
def test_take(ctx_getter): context = ctx_getter() queue = cl.CommandQueue(context) idx = cl_array.arange(context, queue, 0, 200000, 2, dtype=numpy.uint32) a = cl_array.arange(context, queue, 0, 600000, 3, dtype=numpy.float32) result = cl_array.take(a, idx) assert ((3*idx).get() == result.get()).all()
def test_take(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) idx = cl_array.arange(queue, 0, 200000, 2, dtype=np.uint32) a = cl_array.arange(queue, 0, 600000, 3, dtype=np.float32) result = cl_array.take(a, idx) assert ((3 * idx).get() == result.get()).all()
def get_nqbx_centers_per_tgt_box(self, queue, geo_data, weights=None): """ :arg queue: a :class:`pyopencl.CommandQueue` object. :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg weights: a :class:`pyopencl.array.Array` of shape ``(ncenters,)`` with particle_id_dtype, the weight of each center in user order. :return: a :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)`` with type *particle_id_dtype* where the *i*th entry represents the number of `geo_data.global_qbx_centers` in ``target_boxes[i]``, optionally weighted by *weights*. """ traversal = geo_data.traversal() tree = geo_data.tree() global_qbx_centers = geo_data.global_qbx_centers() ncenters = geo_data.ncenters # Build a mask (weight) of whether a target is a global qbx center global_qbx_centers_tree_order = take( tree.sorted_target_ids, global_qbx_centers, queue=queue ) global_qbx_center_weight = cl.array.zeros( queue, tree.ntargets, dtype=tree.particle_id_dtype ) self._fill_array_with_index( queue, global_qbx_center_weight, global_qbx_centers_tree_order, 1 ) if weights is not None: assert weights.dtype == tree.particle_id_dtype global_qbx_center_weight[tree.sorted_target_ids[:ncenters]] *= weights # Each target box enumerate its target list and add the weight of global # qbx centers ntarget_boxes = len(traversal.target_boxes) nqbx_centers_itgt_box = cl.array.empty( queue, ntarget_boxes, dtype=tree.particle_id_dtype ) count_global_qbx_centers_knl = self.count_global_qbx_centers_knl( queue.context, tree.box_id_dtype, tree.particle_id_dtype ) count_global_qbx_centers_knl( nqbx_centers_itgt_box, global_qbx_center_weight, traversal.target_boxes, tree.box_target_starts, tree.box_target_counts_nonchild, queue=queue ) return nqbx_centers_itgt_box