def _run(work): try: (idx, images, args) = work chanIdx = idx % Dispatcher.nWorkers token = pid2TokenStr() shape = Dispatcher.inshape image_shapes = [] for i, img in enumerate(images): cached = Dispatcher.inBlobCache.get(img) if cached is None: Dispatcher.inBlob[token][i, ...], img_shape = xdnn_io.loadYoloImageBlobFromFile(img, shape[2], shape[3]) Dispatcher.inBlobCache.set(img, (Dispatcher.inBlob[token][i].copy(), img_shape)) image_shapes.append(img_shape) else: Dispatcher.inBlob[token][i, ...] = cached[0] image_shapes.append(cached[1]) meta = { 'id': idx, 'from': token, 'shape': shape, 'images': images, 'image_shapes': image_shapes } if idx % 1000 == 0: print("Put query %d to objstore" % idx) sys.stdout.flush() Dispatcher.xspub[token].put_blob(chanIdx2Str(chanIdx), Dispatcher.inBlob[token], meta) Dispatcher.xstoken[token].get_msg() except Exception as e: logging.error("Producer exception " + str(e))
def run(self, inum_chunk): write_slot = self._shared_trans_arrs.openWriteId() write_arrs = self._shared_trans_arrs.accessNumpyBuffer(write_slot) if not self._args['benchmarkmode']: for i, inum in enumerate(inum_chunk): write_arrs[0][i][:], shape = xdnn_io.loadYoloImageBlobFromFile(self._imgpaths[inum], self._firstInputShape[2], self._firstInputShape[3]) write_arrs[-1][i][0] = inum write_arrs[-1][i][1:4] = shape # Fill -1 for unfilled image slots in whole batch write_arrs[-1][len(inum_chunk):][:] = -1 self._shared_trans_arrs.closeWriteId(write_slot)
def _run(self, objId): (meta, inbuf) = self._xsIn.obj_get(objId) # print("PRE get input : {} ".format(meta)) try: if 'path' in meta: img = str(meta['path']) else: img = np.frombuffer(inbuf, getattr(np, meta['dtype'])) img = img.reshape(int(meta['image_height']), int(meta['image_width']), int(meta['image_channels'])) meta[ 'path'] = 'dummy_path' # TODO remove the dependency on this key # put encoded image in meta for return to client thumb = xdnn_io.makeThumbnail( img, max(self._firstInputShape[1], self._firstInputShape[2])) meta['resized_shape'] = thumb.shape retval, img_str = cv2.imencode(".jpg", thumb) if retval: base64_str = base64.b64encode(img_str) meta['img'] = base64_str.decode('utf-8') np_arr = np.zeros(tuple(self._firstInputShape[1:]), dtype=np.float32, order='C') if not self._args['benchmarkmode']: # print("PRE loading img : {}".format(img)) np_arr[:], meta[ 'image_shape'] = xdnn_io.loadYoloImageBlobFromFile( img, int(self._args['net_h']), int(self._args['net_w'])) # np_arr has the shape (C,H,W) # print("PRE put input : {} ".format(meta)) self.put(meta, np_arr) except Exception as e: print("ERROR : {}".format(str(e))) logger.exception("pre error %s - %s" % (meta['id']), str(e))
def main(): parser = xdnn_io.default_parser_args() parser = yolo_parser_args(parser) args = parser.parse_args() args = xdnn_io.make_dict_args(args) # Setup the environment img_paths = xdnn_io.getFilePaths(args['images']) if (args['golden'] or args['visualize']): assert args['labels'], "Provide --labels to compute mAP." assert args[ 'results_dir'], "For accuracy measurements, provide --results_dir to save the detections." labels = xdnn_io.get_labels(args['labels']) colors = generate_colors(len(labels)) if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc runner = Runner(args['vitis_rundir']) # Setup the blobs inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() batch_sz = args['batch_sz'] if batch_sz == -1: batch_sz = inTensors[0].dims[0] fpgaBlobs = [] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batch_sz, ) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) fpgaInput = fpgaBlobs[0][0] # Setup the YOLO config net_h, net_w = fpgaInput.shape[-2:] args['net_h'] = net_h args['net_w'] = net_w biases = bias_selector(args) # Setup profiling env prep_time = 0 exec_time = 0 post_time = 0 # Start the execution for i in range(0, len(img_paths), batch_sz): pl = [] img_shapes = [] # Prep images t1 = timeit.default_timer() for j, p in enumerate(img_paths[i:i + batch_sz]): fpgaInput[j, ...], img_shape = xdnn_io.loadYoloImageBlobFromFile( p, net_h, net_w) pl.append(p) img_shapes.append(img_shape) t2 = timeit.default_timer() # Execute jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) # Post Proc t3 = timeit.default_timer() boxes = yolo_postproc(fpgaBlobs[1], args, img_shapes, biases=biases) t4 = timeit.default_timer() prep_time += (t2 - t1) exec_time += (t3 - t2) post_time += (t4 - t3) for i in range(min(batch_sz, len(img_shapes))): print("Detected {} boxes in {}".format(len(boxes[i]), pl[i])) # Save the result if (args['results_dir']): for i in range(min(batch_sz, len(img_shapes))): filename = os.path.splitext(os.path.basename(pl[i]))[0] out_file_txt = os.path.join(args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format(len(boxes[i]), out_file_txt)) sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes[i], img_shapes[i]) if (args['visualize']): out_file_png = os.path.join(args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)) sys.stdout.flush() draw_boxes(pl[i], boxes[i], labels, colors, out_file_png) # Profiling results if (args['profile']): print("\nAverage Latency in ms:") print(" Image Prep: {0:3f}".format(prep_time * 1000.0 / len(img_paths))) print(" Exec: {0:3f}".format(exec_time * 1000.0 / len(img_paths))) print(" Post Proc: {0:3f}".format(post_time * 1000.0 / len(img_paths))) sys.stdout.flush() # mAP calculation if (args['golden']): print() print("Computing mAP score : ") print("Class names are : {} ".format(labels)) mAP = calc_detector_mAP(args['results_dir'], args['golden'], len(labels), labels, args['prob_threshold'], args['mapiouthresh'], args['points']) sys.stdout.flush()
def yolo_gpu_inference(backend_path, image_dir, deploy_model, weights, out_labels, IOU_threshold, scorethresh, mean_value, pxscale, transpose, channel_swap, yolo_model, num_classes, args): # Setup the environment images = xdnn_io.getFilePaths(args['images']) if (args['golden'] or args['visualize']): assert args['labels'], "Provide --labels to compute mAP." assert args[ 'results_dir'], "For accuracy measurements, provide --results_dir to save the detections." labels = xdnn_io.get_labels(args['labels']) colors = generate_colors(len(labels)) # Select postproc and biases if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc biases = bias_selector(args) import caffe caffe.set_mode_cpu() print(args) if (args['gpu'] is not None): caffe.set_mode_gpu() caffe.set_device(args['gpu']) net = caffe.Net(deploy_model, weights, caffe.TEST) net_h, net_w = net.blobs['data'].data.shape[-2:] args['net_h'] = net_h args['net_w'] = net_w for i, img in enumerate(images): if ((i + 1) % 100 == 0): print(i + 1, "images processed") raw_img, img_shape = xdnn_io.loadYoloImageBlobFromFile( img, net_h, net_w) net.blobs['data'].data[...] = raw_img out = net.forward() caffeOutput = sorted(out.values(), key=lambda item: item.shape[-1]) boxes = yolo_postproc(caffeOutput, args, [img_shape], biases=biases) print("{}. Detected {} boxes in {}".format(i, len(boxes[0]), img)) # Save the result boxes = boxes[0] if (args['results_dir']): filename = os.path.splitext(os.path.basename(img))[0] out_file_txt = os.path.join(args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format(len(boxes), out_file_txt)) sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes, img_shape) if (args['visualize']): out_file_png = os.path.join(args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)) sys.stdout.flush() draw_boxes(img, boxes, labels, colors, out_file_png) # draw_boxes(images[i],bboxes,class_names,colors=[(0,0,0)]*num_classes) return len(images)