def _run(work): try: (idx, images, args) = work chanIdx = idx % Dispatcher.nWorkers token = pid2TokenStr() shape = Dispatcher.inshape for i, img in enumerate(images): cached = Dispatcher.inBlobCache.get(img) if cached is None: Dispatcher.inBlob[token][ i, ...], _ = xdnn_io.loadImageBlobFromFile( img, args[0], args[1], args[2], shape[2], shape[3]) Dispatcher.inBlobCache.set( img, np.copy(Dispatcher.inBlob[token][i])) else: Dispatcher.inBlob[token][i, ...] = cached meta = {'id': idx, 'from': token, 'shape': shape, 'images': images} if idx % 1000 == 0: print("Put query %d to objstore" % idx) sys.stdout.flush() Dispatcher.xspub[token].put_blob(chanIdx2Str(chanIdx), Dispatcher.inBlob[token], meta) Dispatcher.xstoken[token].get_msg() except Exception as e: logging.error("Producer exception " + str(e))
def pre_process(self, image_paths, fpgaInput): input_buffer = list(fpgaInput.values( ))[0] # Assume the first network input needs to have image data loaded for index, image_path in enumerate(image_paths): input_buffer[index, ...], _ = xdnn_io.loadImageBlobFromFile( image_path, self.args['img_raw_scale'], self.args['img_mean'], self.args['img_input_scale'], self.args['inShape'][2], self.args['inShape'][3])
def main(): args = xdnn_io.processCommandLine() runner = Runner(args['vitis_rundir']) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() batch_sz = args['batch_sz'] if batch_sz == -1: # use Runner's suggested batch size batch_sz = inTensors[0].dims[0] if args['golden']: goldenMap = xdnn_io.getGoldenMap(args['golden']) top5Count = 0 top1Count = 0 fpgaBlobs = [] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batch_sz,) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) img_paths = xdnn_io.getFilePaths(args['images']) labels = xdnn_io.get_labels(args['labels']) xdnnCPUOp = xdnn.XDNNCPUOp("%s/weights.h5" % args['vitis_rundir']) fcOutput = np.empty((batch_sz, args['outsz'],), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] for i in range(0, len(img_paths), batch_sz): pl = [] # fill tensor input data from image file for j, p in enumerate(img_paths[i:i + batch_sz]): img, _ = xdnn_io.loadImageBlobFromFile(p, args['img_raw_scale'], args['img_mean'], args['img_input_scale'], fpgaInput.shape[2], fpgaInput.shape[3]) pl.append(p) np.copyto(fpgaInput[j], img) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) xdnnCPUOp.computeFC(fpgaBlobs[1][0], fcOutput) softmaxOut = xdnnCPUOp.computeSoftmax(fcOutput) if args['golden']: for j,p in enumerate(img_paths[i:i + batch_sz]): top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 1) top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 5) else: xdnn_io.printClassification(softmaxOut, pl, labels) if args['golden']: print ( ("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (len(img_paths), float(top1Count)/float(len(img_paths))*100., float(top5Count)/float(len(img_paths))*100.) )
def run(self, inum_chunk): write_slot = self._shared_trans_arrs.openWriteId() write_arrs = self._shared_trans_arrs.accessNumpyBuffer(write_slot) if not self._args['benchmarkmode']: for i, inum in enumerate(inum_chunk): write_arrs[0][i][:], shape = xdnn_io.loadImageBlobFromFile(self._imgpaths[inum], self._args['img_raw_scale'], self._meanarr, self._args['img_input_scale'], self._firstInputShape[2], self._firstInputShape[3]) write_arrs[-1][i][0] = inum write_arrs[-1][i][1:4] = shape # Fill -1 for unfilled image slots in whole batch write_arrs[-1][len(inum_chunk):][:] = -1 self._shared_trans_arrs.closeWriteId(write_slot)