def common_uspmv(rows,cols,datas,m_sizes,k_sizes,nnz_sizes, num_runs,vector_range): ddrWidth = int(xclbin_opts["GEMX_ddrWidth"]) min_k = ddrWidth min_m = ddrWidth * int(xclbin_opts["GEMX_uspmvInterleaves"]) for i in range(len(m_sizes)): m_sizes[i] = test.get_padded_size (m_sizes[i], min_m) k_sizes[i] = test.get_padded_size (k_sizes[i], min_m) print ("size:",m_sizes,k_sizes,"nnz:",nnz_sizes) B = np.zeros((num_runs, k_sizes[i]), dtype=np.float32) test.fillMod(9, num_runs, k_sizes[i],B) B = B.astype(np.float32) C_list=[B] for i in range(len(m_sizes)): C = np.zeros ((num_runs, m_sizes[i]), dtype=np.float32) C_list.append(C) A = gemx.sendUSpMat(np.array(rows[i]).astype(np.uint16), np.array(cols[i]).astype(np.uint16), np.array(datas[i]), np.array(m_sizes[i],dtype=np.int32), np.array(k_sizes[i],dtype=np.int32), np.array(nnz_sizes[i],dtype=np.int32), np.array(1,dtype=np.float32), xclbin_opts) gemx.sendMat(C_list[i]) gemx.sendMat(C_list[i+1]) gemx.addUSPMVOp(A,C_list[i],C_list[i+1],num_runs) gemx.execute() gemx.clearInstrBuf() gemx.getMat(C_list[-1]) test.multiply_and_cmp_uspmv(rows,cols,datas,m_sizes,k_sizes,B,C_list[-1])
def predict ( self, inp): self.out_dim = (inp.shape[0],self.out_dim[1]) inp = self.format_for_fpga(inp, 1, 1) B = inp.astype(np.float32) gemx.sendMat(B) C = np.zeros ((inp.shape[0], self.sizes[0][0]), dtype=np.float32) gemx.sendMat(C) gemx.addUSPMVOp(self.A_list[0],B,C,inp.shape[0]) gemx.execute() gemx.getMat(C) gemx.clearInstrBuf() result = C return result[:self.out_dim[0],:self.out_dim[1]]
def predict(self, inp): """ prepare input matrix for the engine, send all the matrices and instructions to kernel and get the result prediction matrix Parameters ---------- inp: array input matrix Return ------ array result prediction matrix """ stage_size = int(self.xclbin_opts["GEMX_uspmvStages"]) layer_size = len(self._qw) if stage_size == 1: inp = self.format_for_fpga(inp, 1, self.min_m) C_list = [inp.astype(np.float32)] gemx.sendMat(C_list[0]) for i in range(layer_size): C_list.append( np.zeros((inp.shape[0], self.sizes[i][0]), dtype=np.float32)) gemx.sendMat(C_list[i + 1]) gemx.addUSPMVOp(self.A_list[i], C_list[i], C_list[i + 1], inp.shape[0]) else: inp = self.format_for_fpga(inp, 1, self.min_m) C_list = [inp.astype(np.float32)] gemx.sendMat(C_list[0]) C_end = np.zeros((inp.shape[0], self.sizes[-1][0]), dtype=np.float32) gemx.sendMat(C_end) gemx.addUSPMVOp(self.A_list[0], C_list[0], C_list[-1], inp.shape[0]) gemx.execute() gemx.getMat(C_list[-1]) gemx.clearInstrBuf() result = C_list[-1] return result[:self.out_dim[0], :self.out_dim[1]]
gemx.sendMat(C_buf[i + 1]) else: gemx.sendMat(C_buf[i + 1]) gemx.sendMat(bias_buf[i]) else: gemx.sendMat(C_buf[i + 1]) time.sleep(2) total_time = 0 for k in range(args.numiter): #interations start_time = time.time() gemx.sendMat(C_buf[0]) if stage_size == 1: for i in range(num_matrix): if args.engine == 'spmv': gemx.addUSPMVOp(A_buf[i], C_buf[i], C_buf[i + 1], number_runs) else: gemx.addFCNOp(A_buf[i], C_buf[i], C_buf[i + 1], bias_buf[i], 1, 0, 0, 0) else: gemx.addUSPMVOp(A_buf[0], C_buf[0], C_buf[-1], number_runs) gemx.execute() gemx.getMat(C_buf[-1]) total_time += time.time() - start_time gemx.clearInstrBuf() exectime = total_time / args.numiter print("Average FPGA exec time(python): ", exectime * 1000, " ms") print("inf/s:", number_runs / exectime) gemx.printStats()