def _get_remove_kernels(self): import compyle.parallel as parallel @annotate(i='int', gintp='indices, if_remove') def fill_if_remove(i, indices, if_remove): if_remove[indices[i]] = 1 fill_if_remove_knl = parallel.Elementwise(fill_if_remove, backend=self.backend) @annotate(i='int', if_remove='gintp', return_='int') def remove_input_expr(i, if_remove): return if_remove[i] types = { 'i': 'int', 'item': 'int', 'if_remove': 'gintp', 'new_array': self.gptr_type, 'old_array': self.gptr_type } @annotate(**types) def remove_output_expr(i, item, if_remove, new_array, old_array): if not if_remove[i]: new_array[i - item] = old_array[i] remove_knl = parallel.Scan(remove_input_expr, remove_output_expr, 'a+b', dtype=np.int32, backend=self.backend) return fill_if_remove_knl, remove_knl
def cumsum(ary, backend=None, out=None): if backend is None: backend = ary.backend if backend == 'opencl' or backend == 'cuda': import compyle.parallel as parallel if out is None: out = empty(ary.length, ary.dtype, backend=backend) cumsum_scan = parallel.Scan( inp_cumsum, out_cumsum, 'a+b', dtype=ary.dtype, backend=backend ) cumsum_scan(ary=ary, out=out) return out elif backend == 'cython': output = np.cumsum(ary, out=out) return wrap_array(output, backend)