예제 #1
0
def runTest(opt):
    d = pyxrt.device(opt.index)
    xbin = pyxrt.xclbin(opt.bitstreamFile)
    uuid = d.load_xclbin(xbin)
    memlist = xbin.get_mems()
    for m in memlist:
        if (m.get_used() == False):
            continue;
        runMemTest(opt, d, m);
예제 #2
0
def runKernel(opt):
    result = 0
    d = pyxrt.device(opt.index)
    uuid = d.load_xclbin(opt.bitstreamFile)
    # Instantiate vectorswizzle
    swizzle = pyxrt.kernel(d, uuid, "vectorswizzle")

    elem_num = 4096
    size = ctypes.sizeof(ctypes.c_int) * elem_num

    obj = pyxrt.bo(d, size, pyxrt.bo.normal, swizzle.group_id(0))
    buf = numpy.asarray(obj.map())

    # Compute golden values
    reference = []

    for idx in range(elem_num):
        remainder = idx % 4
        buf[idx] = idx
        if remainder == 0:
            reference.append(idx + 2)
        if remainder == 1:
            reference.append(idx + 2)
        if remainder == 2:
            reference.append(idx - 2)
        if remainder == 3:
            reference.append(idx - 2)

    obj.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE, size, 0)

    # Create a run object without starting kernel
    run = pyxrt.run(swizzle)

    global_dim = [size // 4, 0]
    # int4 vector count global range
    local_dim = [16, 0]
    # int4 vector count global range
    group_size = global_dim[0] // local_dim[0]

    # Run swizzle with 16 (local[0]) elements at a time
    # Each element is an int4 (sizeof(int) * 4 bytes)
    # Create sub buffer to offset kernel argument in parent buffer
    local_size_bytes = local_dim[0] * ctypes.sizeof(ctypes.c_int) * 4
    for id in range(group_size):
        subobj = pyxrt.bo(obj, local_size_bytes, local_size_bytes * id)
        run.set_arg(0, subobj)
        run.start()
        state = run.state()
        state = run.wait(5)

    obj.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, size, 0)

    print("Compare the FPGA results with golden data")
    for idx in range(elem_num):
        assert (buf[idx] == reference[idx])

    return 0
예제 #3
0
def runKernel(opt):
    d = pyxrt.device(opt.index)
    xbin = pyxrt.xclbin(opt.bitstreamFile)
    uuid = d.load_xclbin(xbin)

    kernellist = xbin.get_kernels()

    rule = re.compile("hello*")
    kernel = list(filter(lambda val: rule.match(val.get_name()),
                         kernellist))[0]
    hello = pyxrt.kernel(d, uuid, kernel.get_name(), pyxrt.kernel.shared)

    zeros = bytearray(opt.DATA_SIZE)
    boHandle1 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0))
    boHandle1.write(zeros, 0)
    buf1 = boHandle1.map()

    boHandle2 = pyxrt.bo(d, opt.DATA_SIZE, pyxrt.bo.normal, hello.group_id(0))
    boHandle2.write(zeros, 0)
    buf2 = boHandle2.map()

    boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE,
                   opt.DATA_SIZE, 0)
    boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE,
                   opt.DATA_SIZE, 0)

    print("Original string = [%s]" % buf1[:64].tobytes())
    print("Original string = [%s]" % buf2[:64].tobytes())

    print("Issue kernel start requests")
    run1 = hello(boHandle1)
    run2 = hello(boHandle2)

    print("Now wait for the kernels to finish using xrtRunWait()")
    state1 = run1.wait(5)
    state2 = run2.wait(5)

    print("Get the output data produced by the 2 kernel runs from the device")
    boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE,
                   opt.DATA_SIZE, 0)
    boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE,
                   opt.DATA_SIZE, 0)

    golden = memoryview(b'Hello World')
    result1 = buf1[:len(golden)]
    result2 = buf2[:len(golden)]
    print("Result string = [%s]" % result1.tobytes())
    print("Result string = [%s]" % result2.tobytes())
    assert (result1 == golden), "Incorrect output from kernel"
    assert (result2 == golden), "Incorrect output from kernel"
예제 #4
0
파일: main.py 프로젝트: stsoe/XRT
def runKernel(opt):
    d = pyxrt.device(opt.index)
    xbin = pyxrt.xclbin(opt.bitstreamFile)
    uuid = d.load_xclbin(xbin)

    COUNT = 1024
    DATA_SIZE = ctypes.sizeof(ctypes.c_int32) * COUNT

    # Instantiate simple
    simple = pyxrt.kernel(d, uuid, "simple")

    print("Allocate and initialize buffers")
    boHandle1 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(0))
    boHandle2 = pyxrt.bo(d, DATA_SIZE, pyxrt.bo.normal, simple.group_id(1))
    bo1 = numpy.asarray(boHandle1.map())
    bo2 = numpy.asarray(boHandle2.map())

    for i in range(COUNT):
        bo1[i] = 0
        bo2[i] = i

    bufReference = [i + i * 16 for i in range(COUNT)]

    boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE,
                   DATA_SIZE, 0)
    boHandle2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE,
                   DATA_SIZE, 0)

    print("Start the kernel, simple")
    run = simple(boHandle1, boHandle2, 0x10)
    print("Now wait for the kernel simple to finish")
    state = run.wait()

    print("Get the output data from the device and validate it")
    boHandle1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE,
                   DATA_SIZE, 0)
    assert (bufReference[:COUNT] == bo1[:COUNT]
            ), "Computed value does not match reference"
예제 #5
0
def main():
    global g_refresh_rate
    global g_report_number

    # Get and validate the options
    opt = options_parser()

    g_refresh_rate = float(opt.s)

    if g_refresh_rate < 1:
        raise RuntimeError("Please specify a refresh rate greater than 1 second")

    if g_refresh_rate > 60:
        raise RuntimeError("Please specify a refresh rate less than 60 seconds")

    dev = pyxrt.device(opt.bdf)

    # Check the terminal size
    term_size = get_terminal_size()
    x_len = term_size.columns
    y_len = term_size.lines
    MIN_X = 100
    MIN_Y = 44
    if x_len < MIN_X or y_len < MIN_Y:
        raise RuntimeError("Please resize the terminal window.  The current size %dx%d is smaller then the required size of %dx%d" % (x_len, y_len, MIN_X, MIN_Y))

    with XBUtil.Terminal() as term:
        term.hide_cursor(True)

        # creating a lock
        lock = threading.Lock()

        # Print the key
        print_footer(term, lock, y_len)

        # Running clock
        t1 = threading.Thread(target=running_clock, args=(term, lock))
        t1.daemon = True
        t1.start()

        # Running counter
        t1 = threading.Thread(target=running_counter, args=(term, lock, x_len))
        t1.daemon = True
        t1.start()

        # Running reports
        t2 = threading.Thread(target=running_reports, args=(term, lock, dev, x_len))
        t2.daemon = True
        t2.start()

        # Main thread that consumes the keys pressed by the user.
        while True:
            key = XBUtil.get_char()
            if key in ['q', 'Q']:
                break

            if key in ['n', 'N']:
                g_report_number += 1

            if key in ['p', 'P']:
                g_report_number -= 1

            if key in ['+']:             # Hidden option
                g_refresh_rate += 1
                if g_refresh_rate > 60:
                    g_refresh_rate = 60

            if key in ['-']:             # Hidden option
                g_refresh_rate -= 1
                if g_refresh_rate < 1:
                    g_refresh_rate = 1
예제 #6
0
def runKernel(opt):
    d = pyxrt.device(opt.index)
    xbin = pyxrt.xclbin(opt.bitstreamFile)
    uuid = d.load_xclbin(xbin)

    khandle1 = pyxrt.kernel(d, uuid, "bandwidth1", pyxrt.kernel.shared)
    khandle2 = pyxrt.kernel(d, uuid, "bandwidth2", pyxrt.kernel.shared)

    output_bo1, output_buf1 = getInputOutputBuffer(d, khandle1, 0, False)
    output_bo2, output_buf2 = getInputOutputBuffer(d, khandle2, 0, False)
    input_bo1, input_buf1 = getInputOutputBuffer(d, khandle1, 1, True)
    input_bo2, input_buf2 = getInputOutputBuffer(d, khandle2, 1, True)

    TYPESIZE = 512
    threshold = getThreshold(d)
    beats = 16

    #lists
    dnsduration = []
    dsduration  = []
    dbytes      = []
    dmbytes     = []
    bpersec     = []
    mbpersec    = []

    #run tests with burst length 1 beat to DATASIZE
    #double burst length each test
    test=0
    throughput = []
    failed = False
    while beats <= 1024 and not failed:
        print("LOOP PIPELINE %d beats" %beats)

        usduration = 0
        fiveseconds = 5*1000000
        reps = 64
        while usduration < fiveseconds:
            start = current_micro_time()
            rhandle1 = khandle1(output_bo1, input_bo1, beats, reps)
            rhandle2 = khandle2(output_bo2, input_bo2, beats, reps)
            rhandle1.wait()
            rhandle2.wait()
            end = current_micro_time()

            usduration = end - start
            limit = beats * int(TYPESIZE / 8)
            output_bo1.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, limit, 0)
            output_bo2.sync(pyxrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE, limit, 0)

            failed = (input_buf1[:limit] != output_buf1[:limit])
            if (failed):
                break

            failed = (input_buf2[:limit] != output_buf2[:limit])
            if (failed):
                break
            # print("Reps = %d, Beats = %d, Duration = %lf us" %(reps, beats, usduration)) # for debug

            if usduration < fiveseconds:
                reps = reps*2

        dnsduration.append(usduration)
        dsduration.append(dnsduration[test]/1000000.0)
        dbytes.append(reps*beats*int(TYPESIZE / 8))
        dmbytes.append(dbytes[test]/(1024 * 1024))
        bpersec.append(2.0*dbytes[test]/dsduration[test])
        mbpersec.append(2.0*bpersec[test]/(1024 * 1024))
        throughput.append(mbpersec[test])
        print("Test %d, Throughput: %d MB/s" %(test, throughput[test]))
        beats = beats*4
        test+=1

    if failed:
        raise RuntimeError("ERROR: Failed to copy entries")

    print("TTTT: %d" %throughput[0])
    print("Maximum throughput: %d MB/s" %max(throughput))
    if max(throughput) < threshold:
        raise RuntimeError("ERROR: Throughput is less than expected value of %d GB/sec" %(threshold/1000))