def shuffle_down(val, width): tid = roc.get_local_id(0) roc.wavebarrier() idx = (tid + width) % WAVESIZE res = roc.ds_permute(idx, val) return res
def shuffle_down(val, width): tid = roc.get_local_id(0) roc.wavebarrier() idx = (tid - width) % _WAVESIZE res = roc.ds_permute(idx, val) return res
def foo(inp, mask, out): tid = roc.get_local_id(0) out[tid] = roc.ds_permute(inp[tid], mask[tid])