def common_uspmv_dev(rows,cols,datas,m_sizes,k_sizes,nnz_sizes, num_runs,A_handles,B_handles,C_handles): ddrWidth = int(xclbin_opts["GEMX_ddrWidth"]) stages = int(xclbin_opts["GEMX_uspmvStages"]) min_k = ddrWidth min_m = ddrWidth * int(xclbin_opts["GEMX_uspmvInterleaves"]) for i in range(len(m_sizes)): m_sizes[i] = test.get_padded_size (m_sizes[i], min_m) k_sizes[i] = test.get_padded_size (k_sizes[i], min_m) print ("size:",m_sizes,k_sizes,"nnz:",nnz_sizes) B = gemx.addDevBuf(B_handles[0],num_runs, k_sizes[0],np.float32) B_tmp = np.zeros((num_runs, k_sizes[0]), dtype=np.float32) test.fillMod(9, num_runs, k_sizes[0],B_tmp) B[:]=B_tmp C_list=[] for i in range(len(m_sizes)): C = gemx.addDevBuf(C_handles[i],num_runs, m_sizes[-1],np.float32) C.fill(0) C_list.append(C) A = gemx.addUSpDevBuf(np.array(rows[i]).astype(np.uint16), np.array(cols[i]).astype(np.uint16), np.array(datas[i]), A_handles[i], np.array(m_sizes[i],dtype=np.int32), np.array(k_sizes[i],dtype=np.int32), np.array(nnz_sizes[i],dtype=np.int32), np.array(1,dtype=np.float32),xclbin_opts) gemx.sendDevBuf(A_handles[i]) gemx.sendDevBuf(B_handles[i]) gemx.sendDevBuf(C_handles[i]) gemx.addUSPMVDevOp(A_handles[i],B_handles[i],C_handles[i],num_runs) gemx.executeDev() gemx.getDevBuf(C_handles[-1]) test.multiply_and_cmp_uspmv(rows,cols,datas,m_sizes,k_sizes,B,C_list[-1])
def common_uspmv(rows,cols,datas,m_sizes,k_sizes,nnz_sizes, num_runs,vector_range): ddrWidth = int(xclbin_opts["GEMX_ddrWidth"]) min_k = ddrWidth min_m = ddrWidth * int(xclbin_opts["GEMX_uspmvInterleaves"]) for i in range(len(m_sizes)): m_sizes[i] = test.get_padded_size (m_sizes[i], min_m) k_sizes[i] = test.get_padded_size (k_sizes[i], min_m) print ("size:",m_sizes,k_sizes,"nnz:",nnz_sizes) B = np.zeros((num_runs, k_sizes[i]), dtype=np.float32) test.fillMod(9, num_runs, k_sizes[i],B) B = B.astype(np.float32) C_list=[B] for i in range(len(m_sizes)): C = np.zeros ((num_runs, m_sizes[i]), dtype=np.float32) C_list.append(C) A = gemx.sendUSpMat(np.array(rows[i]).astype(np.uint16), np.array(cols[i]).astype(np.uint16), np.array(datas[i]), np.array(m_sizes[i],dtype=np.int32), np.array(k_sizes[i],dtype=np.int32), np.array(nnz_sizes[i],dtype=np.int32), np.array(1,dtype=np.float32), xclbin_opts) gemx.sendMat(C_list[i]) gemx.sendMat(C_list[i+1]) gemx.addUSPMVOp(A,C_list[i],C_list[i+1],num_runs) gemx.execute() gemx.clearInstrBuf() gemx.getMat(C_list[-1]) test.multiply_and_cmp_uspmv(rows,cols,datas,m_sizes,k_sizes,B,C_list[-1])
def test_perf_fcn(m, k, n, xclbin_opts, post_scale=[1, 0], A_range=32764, B_range=32764, bias_range=32764): ddrWidth = int(xclbin_opts["GEMX_ddrWidth"]) m = test.get_padded_size(m, int(xclbin_opts["GEMX_gemmMBlocks"]) * ddrWidth) k = test.get_padded_size(k, int(xclbin_opts["GEMX_gemmKBlocks"]) * ddrWidth) n = test.get_padded_size(n, int(xclbin_opts["GEMX_gemmNBlocks"]) * ddrWidth) if xclbin_opts["GEMX_dataType"] == "short": mat_A = np.random.randint(low=-A_range, high=A_range, size=(m, k), dtype=np.int16) mat_B = np.random.randint(low=-B_range, high=B_range, size=(k, n), dtype=np.int16) bias = [] if bias_range != 0: bias = np.random.randint(low=-bias_range, high=bias_range, size=(m, n), dtype=np.int32) else: bias = np.zeros((m, n), dtype=np.int32, order='C') C_fpga = np.zeros((m, n), dtype=np.int16) else: mat_A = np.random.uniform(low=-128, high=128, size=(m, k)).astype(np.float32) mat_B = np.random.uniform(low=-128, high=128, size=(k, n)).astype(np.float32) bias = np.zeros((m, n), dtype=np.float32, order='C') C_fpga = np.zeros((m, n), dtype=np.float32) start_time = time.time() gemx.sendMat(mat_A) gemx.sendMat(mat_B) gemx.sendMat(C_fpga) gemx.sendMat(bias) gemx.addFCNOp(mat_A, mat_B, C_fpga, bias, post_scale[0], post_scale[1], 1, 0) gemx.execute() gemx.clearInstrBuf() gemx.getMat(C_fpga) end_time = time.time() total_operations = 2 * m * n * k + m * n * 3 test.test_perf(end_time - start_time, total_operations, m, k, n, ddrWidth) test.multiply_and_cmp(C_fpga, mat_A, mat_B, bias, m, n, post_scale)
def test_multi_fcn(ins_count, m_size, k_size, n_size, post_scale=[1, 0], A_range=32764, B_range=32764): mat_A = [] mat_C = [] mat_bias = [] ddrWidth = int(xclbin_opts["GEMX_ddrWidth"]) for i in range(ins_count): m_size[i] = test.get_padded_size( m_size[i], int(xclbin_opts["GEMX_gemmMBlocks"]) * ddrWidth) k_size[i] = test.get_padded_size( k_size[i], int(xclbin_opts["GEMX_gemmKBlocks"]) * ddrWidth) n_size[i] = test.get_padded_size( n_size[i], int(xclbin_opts["GEMX_gemmNBlocks"]) * ddrWidth) mat_A.append( np.random.randint(low=-A_range, high=A_range, size=(m_size[i], k_size[i]), dtype=np.int16)) mat_bias.append(np.zeros((m_size[i], n_size[i]), dtype=np.int32)) mat_C.append( np.zeros((m_size[i], n_size[i]), dtype=np.int16, order='C')) mat_B0 = np.random.randint(low=-B_range, high=B_range, size=(k_size[0], n_size[0]), dtype=np.int16) for i in range(ins_count): gemx.sendMat(mat_A[i]) gemx.sendMat(mat_C[i]) gemx.sendMat(mat_bias[i]) gemx.sendMat(mat_B0) gemx.addFCNOp(mat_A[0], mat_B0, mat_C[0], mat_bias[0], post_scale[0], post_scale[1], 1, 0) gemx.addFCNOp(mat_A[1], mat_C[0], mat_C[1], mat_bias[1], post_scale[0], post_scale[1], 1, 0) gemx.addFCNOp(mat_A[2], mat_C[1], mat_C[2], mat_bias[2], post_scale[0], post_scale[1], 1, 0) gemx.addFCNOp(mat_A[3], mat_C[2], mat_C[3], mat_bias[3], post_scale[0], post_scale[1], 1, 0) gemx.execute() gemx.clearInstrBuf() gemx.getMat(mat_C[0]) gemx.getMat(mat_C[1]) gemx.getMat(mat_C[2]) gemx.getMat(mat_C[3]) test.multiply_and_cmp(mat_C[3], mat_A[3], mat_C[2], mat_bias[3], m_size[3], n_size[3], post_scale)