def runKernel(opt): result = 0 d = pyxrt.device(opt.index) uuid = d.load_xclbin(opt.bitstreamFile) # Instantiate vectorswizzle swizzle = pyxrt.kernel(d, uuid, "vectorswizzle") elem_num = 4096 size = ctypes.sizeof(ctypes.c_int) * elem_num obj = pyxrt.bo(d, size, pyxrt.bo.normal, swizzle.group_id(0)) buf = numpy.asarray(obj.map()) # Compute golden values reference = [] for idx in range(elem_num): remainder = idx % 4 buf[idx] = idx if remainder == 0: reference.append(idx + 2) if remainder == 1: reference.append(idx + 2) if remainder == 2: reference.append(idx - 2) if remainder == 3: reference.append(idx - 2) obj.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, size, 0) # Create a run object without starting kernel run = pyxrt.run(swizzle) global_dim = [size // 4, 0] # int4 vector count global range local_dim = [16, 0] # int4 vector count global range group_size = global_dim[0] // local_dim[0] # Run swizzle with 16 (local[0]) elements at a time # Each element is an int4 (sizeof(int) * 4 bytes) # Create sub buffer to offset kernel argument in parent buffer local_size_bytes = local_dim[0] * ctypes.sizeof(ctypes.c_int) * 4 for id in range(group_size): subobj = pyxrt.bo(obj, local_size_bytes, local_size_bytes * id) run.set_arg(0, subobj) run.start() state = run.state() state = run.wait(5) obj.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, size, 0) print("Compare the FPGA results with golden data") for idx in range(elem_num): assert (buf[idx] == reference[idx]) return 0
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) kernellist = xbin.get_kernels() rule = re.compile("hello*") kernel = list(filter(lambda val: rule.match(val.get_name()), kernellist))[0] hello = pyxrt.kernel(d, uuid, kernel.get_name(), pyxrt.kernel.shared) zeros = bytearray(opt.DATA_SIZE) boHandle1 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0)) boHandle1.write(zeros, 0) buf1 = boHandle1.map() boHandle2 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0)) boHandle2.write(zeros, 0) buf2 = boHandle2.map() boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) print("Original string = [%s]" % buf1[:64].tobytes()) print("Original string = [%s]" % buf2[:64].tobytes()) print("Issue kernel start requests") run1 = hello(boHandle1) run2 = hello(boHandle2) print("Now wait for the kernels to finish using xrtRunWait()") state1 = run1.wait(5) state2 = run2.wait(5) print("Get the output data produced by the 2 kernel runs from the device") boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) golden = memoryview(b'Hello World') result1 = buf1[:len(golden)] result2 = buf2[:len(golden)] print("Result string = [%s]" % result1.tobytes()) print("Result string = [%s]" % result2.tobytes()) assert (result1 == golden), "Incorrect output from kernel" assert (result2 == golden), "Incorrect output from kernel"
def getInputOutputBuffer(devhdl, krnlhdl, argno, isInput): bo = pyxrt.bo(devhdl, DATASIZE, pyxrt.bo.normal, krnlhdl.group_id(argno)) buf = bo.map() for i in range(DATASIZE): buf[i] = i%256 if isInput else 0 bo.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATASIZE, 0) return bo, buf
def getInputOutputBuffer(devhdl, krnlhdl, argno, isInput): bo = pyxrt.bo(devhdl, globalbuffersize, pyxrt.bo.normal, krnlhdl.group_id(argno)) buf = numpy.asarray(bo.map()) for i in range(globalbuffersize): buf[i] = i % 256 if isInput else 0 bo.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, globalbuffersize, 0) return bo, buf
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) COUNT = 1024 DATA_SIZE = ctypes.sizeof(ctypes.c_int32) * COUNT # Instantiate simple simple = pyxrt.kernel(d, uuid, "simple") print("Allocate and initialize buffers") boHandle1 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(0)) boHandle2 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(1)) bo1 = numpy.asarray(boHandle1.map()) bo2 = numpy.asarray(boHandle2.map()) for i in range(COUNT): bo1[i] = 0 bo2[i] = i bufReference = [i + i * 16 for i in range(COUNT)] boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATA_SIZE, 0) print("Start the kernel, simple") run = simple(boHandle1, boHandle2, 0x10) print("Now wait for the kernel simple to finish") state = run.wait() print("Get the output data from the device and validate it") boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, DATA_SIZE, 0) assert (bufReference[:COUNT] == bo1[:COUNT] ), "Computed value does not match reference"
def runMemTest(opt, d, mem): print("Testing memory " + mem.get_tag()) boHandle1 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, mem.get_index()) assert (boHandle1.address() != 0xffffffffffffffff), "Illegal physical address for buffer on memory bank " + mem.get_tag() testVector = bytearray(b'hello\nthis is Xilinx OpenCL memory read write test\n:-)\n') buf1 = boHandle1.map() buf1[:len(testVector)] = testVector boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) zeros = bytearray(opt.DATA_SIZE) buf1[:len(testVector)] = zeros[:len(testVector)] # boHandle1.write(zeros, 0) boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) assert (buf1[:len(testVector)] == testVector[:]), "Data migration error on memory bank " + mem.get_tag()