Ejemplo n.º 1
0
platforms = cl.get_platforms()

devices = platforms[0].get_devices()
devices = [devices[1]]
context = cl.Context(devices)
queue = cl.CommandQueue(context)

global_dim = (4096, 4096)
global_shape = (global_dim[1], global_dim[0])

tileList = IncrementalTileList(context, devices, global_dim, (16, 16))
tiles_dim = tileList.dim
n_tiles = tiles_dim[0]*tiles_dim[1]

prefixSum = PrefixSum(context, devices, n_tiles)
streamCompact = StreamCompact(context, devices, n_tiles)

hTiles = np.random.randint(0, 20, (tiles_dim[1], tiles_dim[0])).astype(np.int32)
cl.enqueue_copy(queue, tileList.d_tiles, hTiles).wait()

tileList.build(Operator.GTE, 10)

hList = np.empty((tiles_dim[0]*tiles_dim[1],), np.int32)
cl.enqueue_copy(queue, hList, tileList.d_list).wait()

#Test correctness using tileList - prefixsum and streamcompact are then
#correct too
compact_cpu = np.where(hTiles >= 10)
compact_cpu = map(lambda x, y: y*tiles_dim[0] + x, compact_cpu[1], compact_cpu[0])
assert(np.all(compact_cpu == hList[0:tileList.length]))
Ejemplo n.º 2
0
szFloat =  4
szInt = 4
szChar = 1
cm = cl.mem_flags

platforms = cl.get_platforms()

devices = platforms[0].get_devices()
devices = [devices[1]]
context = cl.Context(devices)
queue = cl.CommandQueue(context)

nSamples = 65536
capcity = nSamples

streamCompact = StreamCompact(context, devices, capcity)

hList = np.empty((nSamples,), np.int32)
dList = streamCompact.listFactory(nSamples)

hFlags = np.random.randint(0, 2, nSamples).astype(np.int32)
dFlags = streamCompact.flagFactory(nSamples)
cl.enqueue_copy(queue, dFlags, hFlags).wait()

hLength = np.empty((1, ), np.int32)
dLength = cl.Buffer(context, cl.mem_flags.READ_WRITE, 1*szInt)

streamCompact.compact(dFlags, dList, dLength, nSamples)
cl.enqueue_copy(queue, hList, dList).wait()
cl.enqueue_copy(queue, hLength, dLength).wait()