def runTest(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) memlist = xbin.get_mems() for m in memlist: if (m.get_used() == False): continue; runMemTest(opt, d, m);
def runKernel(opt): result = 0 d = pyxrt.device(opt.index) uuid = d.load_xclbin(opt.bitstreamFile) # Instantiate vectorswizzle swizzle = pyxrt.kernel(d, uuid, "vectorswizzle") elem_num = 4096 size = ctypes.sizeof(ctypes.c_int) * elem_num obj = pyxrt.bo(d, size, pyxrt.bo.normal, swizzle.group_id(0)) buf = numpy.asarray(obj.map()) # Compute golden values reference = [] for idx in range(elem_num): remainder = idx % 4 buf[idx] = idx if remainder == 0: reference.append(idx + 2) if remainder == 1: reference.append(idx + 2) if remainder == 2: reference.append(idx - 2) if remainder == 3: reference.append(idx - 2) obj.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, size, 0) # Create a run object without starting kernel run = pyxrt.run(swizzle) global_dim = [size // 4, 0] # int4 vector count global range local_dim = [16, 0] # int4 vector count global range group_size = global_dim[0] // local_dim[0] # Run swizzle with 16 (local[0]) elements at a time # Each element is an int4 (sizeof(int) * 4 bytes) # Create sub buffer to offset kernel argument in parent buffer local_size_bytes = local_dim[0] * ctypes.sizeof(ctypes.c_int) * 4 for id in range(group_size): subobj = pyxrt.bo(obj, local_size_bytes, local_size_bytes * id) run.set_arg(0, subobj) run.start() state = run.state() state = run.wait(5) obj.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, size, 0) print("Compare the FPGA results with golden data") for idx in range(elem_num): assert (buf[idx] == reference[idx]) return 0
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) kernellist = xbin.get_kernels() rule = re.compile("hello*") kernel = list(filter(lambda val: rule.match(val.get_name()), kernellist))[0] hello = pyxrt.kernel(d, uuid, kernel.get_name(), pyxrt.kernel.shared) zeros = bytearray(opt.DATA_SIZE) boHandle1 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0)) boHandle1.write(zeros, 0) buf1 = boHandle1.map() boHandle2 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0)) boHandle2.write(zeros, 0) buf2 = boHandle2.map() boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, opt.DATA_SIZE, 0) print("Original string = [%s]" % buf1[:64].tobytes()) print("Original string = [%s]" % buf2[:64].tobytes()) print("Issue kernel start requests") run1 = hello(boHandle1) run2 = hello(boHandle2) print("Now wait for the kernels to finish using xrtRunWait()") state1 = run1.wait(5) state2 = run2.wait(5) print("Get the output data produced by the 2 kernel runs from the device") boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, opt.DATA_SIZE, 0) golden = memoryview(b'Hello World') result1 = buf1[:len(golden)] result2 = buf2[:len(golden)] print("Result string = [%s]" % result1.tobytes()) print("Result string = [%s]" % result2.tobytes()) assert (result1 == golden), "Incorrect output from kernel" assert (result2 == golden), "Incorrect output from kernel"
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) COUNT = 1024 DATA_SIZE = ctypes.sizeof(ctypes.c_int32) * COUNT # Instantiate simple simple = pyxrt.kernel(d, uuid, "simple") print("Allocate and initialize buffers") boHandle1 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(0)) boHandle2 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(1)) bo1 = numpy.asarray(boHandle1.map()) bo2 = numpy.asarray(boHandle2.map()) for i in range(COUNT): bo1[i] = 0 bo2[i] = i bufReference = [i + i * 16 for i in range(COUNT)] boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATA_SIZE, 0) boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, DATA_SIZE, 0) print("Start the kernel, simple") run = simple(boHandle1, boHandle2, 0x10) print("Now wait for the kernel simple to finish") state = run.wait() print("Get the output data from the device and validate it") boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, DATA_SIZE, 0) assert (bufReference[:COUNT] == bo1[:COUNT] ), "Computed value does not match reference"
def main(): global g_refresh_rate global g_report_number # Get and validate the options opt = options_parser() g_refresh_rate = float(opt.s) if g_refresh_rate < 1: raise RuntimeError("Please specify a refresh rate greater than 1 second") if g_refresh_rate > 60: raise RuntimeError("Please specify a refresh rate less than 60 seconds") dev = pyxrt.device(opt.bdf) # Check the terminal size term_size = get_terminal_size() x_len = term_size.columns y_len = term_size.lines MIN_X = 100 MIN_Y = 44 if x_len < MIN_X or y_len < MIN_Y: raise RuntimeError("Please resize the terminal window. The current size %dx%d is smaller then the required size of %dx%d" % (x_len, y_len, MIN_X, MIN_Y)) with XBUtil.Terminal() as term: term.hide_cursor(True) # creating a lock lock = threading.Lock() # Print the key print_footer(term, lock, y_len) # Running clock t1 = threading.Thread(target=running_clock, args=(term, lock)) t1.daemon = True t1.start() # Running counter t1 = threading.Thread(target=running_counter, args=(term, lock, x_len)) t1.daemon = True t1.start() # Running reports t2 = threading.Thread(target=running_reports, args=(term, lock, dev, x_len)) t2.daemon = True t2.start() # Main thread that consumes the keys pressed by the user. while True: key = XBUtil.get_char() if key in ['q', 'Q']: break if key in ['n', 'N']: g_report_number += 1 if key in ['p', 'P']: g_report_number -= 1 if key in ['+']: # Hidden option g_refresh_rate += 1 if g_refresh_rate > 60: g_refresh_rate = 60 if key in ['-']: # Hidden option g_refresh_rate -= 1 if g_refresh_rate < 1: g_refresh_rate = 1
def runKernel(opt): d = pyxrt.device(opt.index) xbin = pyxrt.xclbin(opt.bitstreamFile) uuid = d.load_xclbin(xbin) khandle1 = pyxrt.kernel(d, uuid, "bandwidth1", pyxrt.kernel.shared) khandle2 = pyxrt.kernel(d, uuid, "bandwidth2", pyxrt.kernel.shared) output_bo1, output_buf1 = getInputOutputBuffer(d, khandle1, 0, False) output_bo2, output_buf2 = getInputOutputBuffer(d, khandle2, 0, False) input_bo1, input_buf1 = getInputOutputBuffer(d, khandle1, 1, True) input_bo2, input_buf2 = getInputOutputBuffer(d, khandle2, 1, True) TYPESIZE = 512 threshold = getThreshold(d) beats = 16 #lists dnsduration = [] dsduration = [] dbytes = [] dmbytes = [] bpersec = [] mbpersec = [] #run tests with burst length 1 beat to DATASIZE #double burst length each test test=0 throughput = [] failed = False while beats <= 1024 and not failed: print("LOOP PIPELINE %d beats" %beats) usduration = 0 fiveseconds = 5*1000000 reps = 64 while usduration < fiveseconds: start = current_micro_time() rhandle1 = khandle1(output_bo1, input_bo1, beats, reps) rhandle2 = khandle2(output_bo2, input_bo2, beats, reps) rhandle1.wait() rhandle2.wait() end = current_micro_time() usduration = end - start limit = beats * int(TYPESIZE / 8) output_bo1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, limit, 0) output_bo2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, limit, 0) failed = (input_buf1[:limit] != output_buf1[:limit]) if (failed): break failed = (input_buf2[:limit] != output_buf2[:limit]) if (failed): break # print("Reps = %d, Beats = %d, Duration = %lf us" %(reps, beats, usduration)) # for debug if usduration < fiveseconds: reps = reps*2 dnsduration.append(usduration) dsduration.append(dnsduration[test]/1000000.0) dbytes.append(reps*beats*int(TYPESIZE / 8)) dmbytes.append(dbytes[test]/(1024 * 1024)) bpersec.append(2.0*dbytes[test]/dsduration[test]) mbpersec.append(2.0*bpersec[test]/(1024 * 1024)) throughput.append(mbpersec[test]) print("Test %d, Throughput: %d MB/s" %(test, throughput[test])) beats = beats*4 test+=1 if failed: raise RuntimeError("ERROR: Failed to copy entries") print("TTTT: %d" %throughput[0]) print("Maximum throughput: %d MB/s" %max(throughput)) if max(throughput) < threshold: raise RuntimeError("ERROR: Throughput is less than expected value of %d GB/sec" %(threshold/1000))