Example #1
0
def wave_reduce(val):
    tmp = val
    tid = hsa.get_local_id(0)
    laneid = tid & (WAVESIZE - 1)

    width = WAVESIZE // 2

    while width > 0:
        hsa.wavebarrier()
        other = hsa.activelanepermute_wavewidth(tmp, laneid + width, 0, False)
        if laneid < width:
            tmp += other

        width //= 2

    # First thread has the result
    hsa.wavebarrier()
    return hsa.activelanepermute_wavewidth(tmp, 0, 0, False)
Example #2
0
def wave_reduce(val):
    tmp = val
    tid = hsa.get_local_id(0)
    laneid = tid & (WAVESIZE - 1)

    width = WAVESIZE // 2

    while width > 0:
        hsa.wavebarrier()
        other = hsa.activelanepermute_wavewidth(tmp, laneid + width, 0, False)
        if laneid < width:
            tmp += other

        width //= 2

    # First thread has the result
    hsa.wavebarrier()
    return hsa.activelanepermute_wavewidth(tmp, 0, 0, False)
Example #3
0
 def foo(inp, mask, out):
     tid = hsa.get_local_id(0)
     out[tid] = hsa.activelanepermute_wavewidth(inp[tid], mask[tid], 0,
                                                False)
Example #4
0
def shuffle_up(val, width):
    tid = hsa.get_local_id(0)
    hsa.wavebarrier()
    res = hsa.activelanepermute_wavewidth(val, tid - width, 0, False)
    return res
def broadcast(val, src):
    hsa.wavebarrier()
    return hsa.activelanepermute_wavewidth(val, src, 0, False)
def shuffle_up(val, width):
    tid = hsa.get_local_id(0)
    hsa.wavebarrier()
    res = hsa.activelanepermute_wavewidth(val, tid - width, 0, False)
    return res
Example #7
0
 def foo(inp, mask, out):
     tid = hsa.get_local_id(0)
     out[tid] = hsa.activelanepermute_wavewidth(inp[tid], mask[tid], 0,
                                                False)