예제 #1
0
    timePointKernel.append(time.time())  # copy from FPGA
    freq = gemx.getFreq()
    test.test_perf(timePointKernel, total_operations,
                   total_parallel_operations, freq, 0, 0, 0)
    if np.max(m_size) > 4096 and np.max(k_size) > 4096 and np.max(
            n_size) > 4096:
        print("Skip golden comparision because large matrix size")
    else:
        test.multiply_and_cmp(mat_C[3], mat_A[3], mat_C[2], mat_bias[3],
                              m_size[3], n_size[3], post_scale)


if __name__ == '__main__':
    np.random.seed(123)  # for reproducibility
    test = GemmTest()
    parser = gemx.processCommandLine()
    args = parser.parse_args()

    gemx.createGEMMHandle(args.xclbin, args.gemxlib, args.device,
                          args.numKernel)
    m_size = np.array([512, 512, 2048, 128])
    k_size = np.array([384, 512, 512, 2048])
    n_size = np.array([128, 128, 128, 128])
    test_perf_multi_gemm(4, m_size, k_size, n_size, 32764, 32764,
                         [1, 0])  # run performance measurement
    gemx.printStats()

#    size = 256
#    while size < 16384:
#        test_perf(32764, 32764, 0, size, size, size, [1,0])
#        size = size * 2
예제 #2
0
파일: test_fcn.py 프로젝트: liujieuw/gemx
    gemx.addFCNOp(mat_A[3], mat_C[2], mat_C[3], mat_bias[3], post_scale[0],
                  post_scale[1], 1, 0)
    gemx.execute()
    gemx.clearInstrBuf()
    gemx.getMat(mat_C[0])
    gemx.getMat(mat_C[1])
    gemx.getMat(mat_C[2])
    gemx.getMat(mat_C[3])
    test.multiply_and_cmp(mat_C[3], mat_A[3], mat_C[2], mat_bias[3], m_size[3],
                          n_size[3], post_scale)


if __name__ == '__main__':
    np.random.seed(123)  # for reproducibility
    test = FcnTest()
    args, xclbin_opts = gemx.processCommandLine()
    gemx.createFCNHandle(args, xclbin_opts)
    if xclbin_opts["GEMX_dataType"] == "short":
        for j in range(1, 3):
            for k in range(1, 8):
                for i in range(int(xclbin_opts["GEMX_numKernels"])):
                    for m, n in ([0, 0], [1, 0]):
                        test.test_basic_randint(i, xclbin_opts, [j, k], [m, n],
                                                2048)

        test.test_basic_size(512, 512, 512, xclbin_opts)

        size = 256
        while size < 8192:
            test_perf_fcn(size, size, size,
                          xclbin_opts)  # run performance measurement