freq = gemx.getFreq() test.test_perf(timePointKernel, total_operations, total_parallel_operations, freq, 0, 0, 0) if np.max(m_size) > 4096 and np.max(k_size) > 4096 and np.max( n_size) > 4096: print("Skip golden comparision because large matrix size") else: test.multiply_and_cmp(mat_C[3], mat_A[3], mat_C[2], mat_bias[3], m_size[3], n_size[3], post_scale) if __name__ == '__main__': np.random.seed(123) # for reproducibility test = GemmTest() parser = gemx.processCommandLine() args = parser.parse_args() gemx.createGEMMHandle(args.xclbin, args.gemxlib, args.device, args.numKernel) m_size = np.array([512, 512, 2048, 128]) k_size = np.array([384, 512, 512, 2048]) n_size = np.array([128, 128, 128, 128]) test_perf_multi_gemm(4, m_size, k_size, n_size, 32764, 32764, [1, 0]) # run performance measurement gemx.printStats() # size = 256 # while size < 16384: # test_perf(32764, 32764, 0, size, size, size, [1,0]) # size = size * 2
gemx.getMat(mat_C[0]) gemx.getMat(mat_C[1]) gemx.getMat(mat_C[2]) gemx.getMat(mat_C[3]) timePointKernel.append(time.time()) # copy from FPGA freq = gemx.getFreq() test.test_perf(timePointKernel, total_operations, total_parallel_operations, freq, 0, 0, 0) test.multiply_and_cmp(mat_C[3], mat_A[3], mat_C[2], mat_bias[3], m_size[3], n_size[3], post_scale) if __name__ == '__main__': np.random.seed(123) # for reproducibility test = GemmTest() args, xclbin_opts = gemx.processCommandLine() gemx.createGEMMHandle(args, xclbin_opts) for PE in range(int(xclbin_opts["GEMX_numKernels"])): for i in range(15): for j in range(15): test.test_basic_randint(PE, xclbin_opts, [i, j], 1024) # test.test_rand_basic (32764, 0, 5, [1,0]) # larger matrix size will lead to hw timeout error in regression test test_multiInstrv1(32764, 512, 512, 128, True) size = 256 while size < 8192: test_perf_gemm(size, size, size) # run performance measurement size = size * 2