def __init__(self, lst_svobjs, objIdOffset): self.m_size = vki.SVUInt32(len(lst_svobjs)) self.m_buf = vki.SVObjBuffer(lst_svobjs) self.m_id_offset = vki.SVUInt32(objIdOffset) self.m_cptr = SVCombine_Create( { 'size': self.m_size, 'data': self.m_buf, 'id_offset': self.m_id_offset }, ''' uint get_size(in Comb_#hash# vec) {{ return vec.size; }} {0} get_value(in Comb_#hash# vec, in uint id) {{ return vec.data[id].v; }} '''.format(self.name_elem_type()))
kernel = vki.Computer(['dst', 'src', 'n'], ''' shared {0} s_buf[{1}]; void main() {{ uint tid = gl_LocalInvocationID.x; uint i = gl_GlobalInvocationID.x; if (i<n) s_buf[tid] = get_value(src, i); barrier(); for (uint s = {1}/2; s>0; s>>=1) {{ if (tid < s && i+s<n) s_buf[tid] += s_buf[tid + s]; barrier(); }} if (tid==0) set_value(dst, gl_WorkGroupID.x, s_buf[tid]); }} '''.format('int',str(BLOCK_SIZE))) dst = darr while dst.size()>1: src = dst n = src.size() blocks = int((n + BLOCK_SIZE - 1) / BLOCK_SIZE) dst = vki.SVVector("int", blocks) kernel.launch(blocks, BLOCK_SIZE, [dst, src, vki.SVUInt32(n)]) print(dst.to_host()[0])