예제 #1
0
        def udt(output):
            global_id = roc.get_global_id(0)
            global_size = roc.get_global_size(0)
            local_id = roc.get_local_id(0)
            group_id = roc.get_group_id(0)
            num_groups = roc.get_num_groups(0)
            workdim = roc.get_work_dim()
            local_size = roc.get_local_size(0)

            output[0, group_id, local_id] = global_id
            output[1, group_id, local_id] = global_size
            output[2, group_id, local_id] = local_id
            output[3, group_id, local_id] = local_size
            output[4, group_id, local_id] = group_id
            output[5, group_id, local_id] = num_groups
            output[6, group_id, local_id] = workdim
예제 #2
0
    def kernel_reduce_min(inp, out, nelem):
        tid = roc.get_local_id(0)
        blkid = roc.get_group_id(0)
        blksz = roc.get_local_size(0)
        numgroup = roc.get_num_groups(0)

        i = blkid * blksz + tid

        accum = dtype(POS_INF)
        while i < nelem:
            accum = min(accum, inp[i])
            i += blksz * numgroup

        accum = group_reducer(accum)
        if tid == 0:
            out[blkid] = accum