def run(rundir, chanIdx, q, args): xspub = xstream.Publisher() xssub = xstream.Subscribe(chanIdx2Str(chanIdx)) runner = Runner(rundir) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() q.put(1) # ready for work fpgaBlobs = None fcOutput = None labels = xdnn_io.get_labels(args['labels']) xdnnCPUOp = xdnn.XDNNCPUOp("%s/weights.h5" % rundir) while True: try: payload = xssub.get() if not payload: break (meta, buf) = payload if fpgaBlobs == None: # allocate buffers fpgaBlobs = [] batchsz = meta['shape'][0] # inTensors[0].dims[0] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batchsz,) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) fcOutput = np.empty((batchsz, args['outsz'],), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] assert(tuple(meta['shape']) == fpgaInput.shape) data = np.frombuffer(buf, dtype=np.float32).reshape(fpgaInput.shape) np.copyto(fpgaInput, data) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) xdnnCPUOp.computeFC(fpgaBlobs[1][0], fcOutput) softmaxOut = xdnnCPUOp.computeSoftmax(fcOutput) xdnn_io.printClassification(softmaxOut, meta['images'], labels) sys.stdout.flush() if meta['id'] % 1000 == 0: print("Recvd query %d" % meta['id']) sys.stdout.flush() del data del buf del payload xspub.send(meta['from'], "success") except Exception as e: logging.error("Worker exception " + str(e))
def __init__(self, args, img_paths, fpgaOutputs, output_shapes,shared_output_arrs): self.args = args self.img_paths = img_paths self.fpgaOutputs = fpgaOutputs self.output_shapes = output_shapes self._shared_output_arrs = shared_output_arrs self.numProcessed = 0 self.startTime = timeit.default_timer() self.cpuOp = xdnn.XDNNCPUOp(self.args['weights']);
def main(): args = xdnn_io.processCommandLine() runner = Runner(args['vitis_rundir']) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() batch_sz = args['batch_sz'] if batch_sz == -1: # use Runner's suggested batch size batch_sz = inTensors[0].dims[0] if args['golden']: goldenMap = xdnn_io.getGoldenMap(args['golden']) top5Count = 0 top1Count = 0 fpgaBlobs = [] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batch_sz,) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) img_paths = xdnn_io.getFilePaths(args['images']) labels = xdnn_io.get_labels(args['labels']) xdnnCPUOp = xdnn.XDNNCPUOp("%s/weights.h5" % args['vitis_rundir']) fcOutput = np.empty((batch_sz, args['outsz'],), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] for i in range(0, len(img_paths), batch_sz): pl = [] # fill tensor input data from image file for j, p in enumerate(img_paths[i:i + batch_sz]): img, _ = xdnn_io.loadImageBlobFromFile(p, args['img_raw_scale'], args['img_mean'], args['img_input_scale'], fpgaInput.shape[2], fpgaInput.shape[3]) pl.append(p) np.copyto(fpgaInput[j], img) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) xdnnCPUOp.computeFC(fpgaBlobs[1][0], fcOutput) softmaxOut = xdnnCPUOp.computeSoftmax(fcOutput) if args['golden']: for j,p in enumerate(img_paths[i:i + batch_sz]): top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 1) top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 5) else: xdnn_io.printClassification(softmaxOut, pl, labels) if args['golden']: print ( ("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (len(img_paths), float(top1Count)/float(len(img_paths))*100., float(top5Count)/float(len(img_paths))*100.) )
def post_process(self, image_paths, fpgaOutput): global labels global fcOutput global cCpuRT if cCpuRT is None: cCpuRT = xdnn.XDNNCPUOp(self.args['weights']) fcOutput = np.empty(( self.args['batch_sz'], self.args['outsz'], ), dtype=np.float32, order='C') labels = xdnn_io.get_labels(self.args['labels']) input_buffer = list(fpgaOutput.values())[ 0] # Assume the first network output will feed CPU layers cCpuRT.computeFC(input_buffer, fcOutput) softmaxOut = cCpuRT.computeSoftmax(fcOutput) if self.args['golden']: xdnn_io.printClassification(softmaxOut, image_paths, labels)