def run(self, imgList, fpgaOutput_list, fpgaOutputShape_list, shapeArr): if self.numProcessed == 0: self.zmqPub = None if self.args['zmqpub']: self.zmqPub = mp_classify.ZmqResultPublisher(self.args['deviceID']) self.goldenMap = None self.numProcessed += len(imgList) bboxlist_for_images = self.yolo_postproc(fpgaOutput_list, args, shapeArr, biases=self.biases) if(not self.args['profile']): for i in range(min(self.args['batch_sz'], len(shapeArr))): print("Detected {} boxes in {}".format(len(bboxlist_for_images[i]), imgList[i])) if(self.args['results_dir']): boxes = bboxlist_for_images for i in range(min(self.args['batch_sz'], len(shapeArr))): filename = os.path.splitext(os.path.basename(imgList[i]))[0] out_file_txt = os.path.join(self.args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format(len(boxes[i]), out_file_txt)); sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes[i], shapeArr[i]) if(self.args['visualize']): out_file_png = os.path.join(self.args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)); sys.stdout.flush() draw_boxes(imgList[i], boxes[i], self.labels, self.colors, out_file_png)
def run(rundir, chanIdx, q, args): xspub = xstream.Publisher() xssub = xstream.Subscribe(chanIdx2Str(chanIdx)) runner = Runner(rundir) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() q.put(1) # ready for work fpgaBlobs = None labels = xdnn_io.get_labels(args['labels']) if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc else: assert args['yolo_version'] in ( 'v2', 'v3'), "--yolo_version should be <v2|v3>" biases = bias_selector(args) if (args['visualize']): colors = generate_colors(len(labels)) while True: try: payload = xssub.get() if not payload: break (meta, buf) = payload if fpgaBlobs == None: # allocate buffers fpgaBlobs = [] batchsz = meta['shape'][0] # inTensors[0].dims[0] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batchsz, ) + tuple( [t.dims[i] for i in range(t.ndims)][1:]) blobs.append( np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) fcOutput = np.empty(( batchsz, args['outsz'], ), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] assert (tuple(meta['shape']) == fpgaInput.shape) data = np.frombuffer(buf, dtype=np.float32).reshape(fpgaInput.shape) np.copyto(fpgaInput, data) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) boxes = yolo_postproc(fpgaBlobs[1], args, meta['image_shapes'], biases=biases) if (not args['profile']): for i in range(min(batchsz, len(meta['image_shapes']))): print("Detected {} boxes in {}".format( len(boxes[i]), meta['images'][i]), flush=True) # Save the result if (args['results_dir']): for i in range(min(batchsz, len(meta['image_shapes']))): fname = meta['images'][i] filename = os.path.splitext(os.path.basename(fname))[0] out_file_txt = os.path.join(args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format( len(boxes[i]), out_file_txt)) sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes[i], meta['image_shapes'][i]) if (args['visualize']): out_file_png = os.path.join( args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)) sys.stdout.flush() draw_boxes(fname, boxes[i], labels, colors, out_file_png) if meta['id'] % 1000 == 0: print("Recvd query %d" % meta['id']) sys.stdout.flush() del data del buf del payload xspub.send(meta['from'], "success") except Exception as e: logging.error("Worker exception " + str(e))
def main(): parser = xdnn_io.default_parser_args() parser = yolo_parser_args(parser) args = parser.parse_args() args = xdnn_io.make_dict_args(args) # Setup the environment img_paths = xdnn_io.getFilePaths(args['images']) if (args['golden'] or args['visualize']): assert args['labels'], "Provide --labels to compute mAP." assert args[ 'results_dir'], "For accuracy measurements, provide --results_dir to save the detections." labels = xdnn_io.get_labels(args['labels']) colors = generate_colors(len(labels)) if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc runner = Runner(args['vitis_rundir']) # Setup the blobs inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() batch_sz = args['batch_sz'] if batch_sz == -1: batch_sz = inTensors[0].dims[0] fpgaBlobs = [] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batch_sz, ) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) fpgaInput = fpgaBlobs[0][0] # Setup the YOLO config net_h, net_w = fpgaInput.shape[-2:] args['net_h'] = net_h args['net_w'] = net_w biases = bias_selector(args) # Setup profiling env prep_time = 0 exec_time = 0 post_time = 0 # Start the execution for i in range(0, len(img_paths), batch_sz): pl = [] img_shapes = [] # Prep images t1 = timeit.default_timer() for j, p in enumerate(img_paths[i:i + batch_sz]): fpgaInput[j, ...], img_shape = xdnn_io.loadYoloImageBlobFromFile( p, net_h, net_w) pl.append(p) img_shapes.append(img_shape) t2 = timeit.default_timer() # Execute jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) # Post Proc t3 = timeit.default_timer() boxes = yolo_postproc(fpgaBlobs[1], args, img_shapes, biases=biases) t4 = timeit.default_timer() prep_time += (t2 - t1) exec_time += (t3 - t2) post_time += (t4 - t3) for i in range(min(batch_sz, len(img_shapes))): print("Detected {} boxes in {}".format(len(boxes[i]), pl[i])) # Save the result if (args['results_dir']): for i in range(min(batch_sz, len(img_shapes))): filename = os.path.splitext(os.path.basename(pl[i]))[0] out_file_txt = os.path.join(args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format(len(boxes[i]), out_file_txt)) sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes[i], img_shapes[i]) if (args['visualize']): out_file_png = os.path.join(args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)) sys.stdout.flush() draw_boxes(pl[i], boxes[i], labels, colors, out_file_png) # Profiling results if (args['profile']): print("\nAverage Latency in ms:") print(" Image Prep: {0:3f}".format(prep_time * 1000.0 / len(img_paths))) print(" Exec: {0:3f}".format(exec_time * 1000.0 / len(img_paths))) print(" Post Proc: {0:3f}".format(post_time * 1000.0 / len(img_paths))) sys.stdout.flush() # mAP calculation if (args['golden']): print() print("Computing mAP score : ") print("Class names are : {} ".format(labels)) mAP = calc_detector_mAP(args['results_dir'], args['golden'], len(labels), labels, args['prob_threshold'], args['mapiouthresh'], args['points']) sys.stdout.flush()
def yolo_gpu_inference(backend_path, image_dir, deploy_model, weights, out_labels, IOU_threshold, scorethresh, mean_value, pxscale, transpose, channel_swap, yolo_model, num_classes, args): # Setup the environment images = xdnn_io.getFilePaths(args['images']) if (args['golden'] or args['visualize']): assert args['labels'], "Provide --labels to compute mAP." assert args[ 'results_dir'], "For accuracy measurements, provide --results_dir to save the detections." labels = xdnn_io.get_labels(args['labels']) colors = generate_colors(len(labels)) # Select postproc and biases if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc biases = bias_selector(args) import caffe caffe.set_mode_cpu() print(args) if (args['gpu'] is not None): caffe.set_mode_gpu() caffe.set_device(args['gpu']) net = caffe.Net(deploy_model, weights, caffe.TEST) net_h, net_w = net.blobs['data'].data.shape[-2:] args['net_h'] = net_h args['net_w'] = net_w for i, img in enumerate(images): if ((i + 1) % 100 == 0): print(i + 1, "images processed") raw_img, img_shape = xdnn_io.loadYoloImageBlobFromFile( img, net_h, net_w) net.blobs['data'].data[...] = raw_img out = net.forward() caffeOutput = sorted(out.values(), key=lambda item: item.shape[-1]) boxes = yolo_postproc(caffeOutput, args, [img_shape], biases=biases) print("{}. Detected {} boxes in {}".format(i, len(boxes[0]), img)) # Save the result boxes = boxes[0] if (args['results_dir']): filename = os.path.splitext(os.path.basename(img))[0] out_file_txt = os.path.join(args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format(len(boxes), out_file_txt)) sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes, img_shape) if (args['visualize']): out_file_png = os.path.join(args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)) sys.stdout.flush() draw_boxes(img, boxes, labels, colors, out_file_png) # draw_boxes(images[i],bboxes,class_names,colors=[(0,0,0)]*num_classes) return len(images)
def loop(self, payload): if self.numProcessed == 0: self.startTime = timeit.default_timer() (meta, buf) = payload # print("POST get payload : {} ".format(meta)) imgList = [] imgShape = [] buf2 = np.frombuffer(buf, dtype=np.float32) # print("buf2 details : ", buf2.shape, buf2.dtype) # print("buf indices : ", self.buf_indices) # print("fpgaOutput shapes : ", self.fpgaOutputShapes) bufs = [] for i in range(len(self.buf_indices[:-1])): tmp = buf2[self.buf_indices[i]:self.buf_indices[i + 1]].reshape( self.fpgaOutputShapes[i]).copy() #print("tmp : ", tmp.shape, tmp.dtype, tmp.flags) bufs.append(tmp) for ri, r in enumerate(meta['requests']): imgList.append(r['path']) imgShape.append(r['image_shape']) if not self._args["benchmarkmode"]: # buf is a list containing multiple blobs for b in range(self._args['batch_sz']): for idx, bname in enumerate( meta['outputs']): #(layer25-conv, layer27-conv) # print("Adding to layer : ", bname, self.net.blobs[bname].data.shape, bufs[idx][b].shape) self.net.blobs[bname].data[...] = bufs[idx][b, ...] _ = self.net.forward(start='layer28-reorg', end='layer31-conv') self.netOut[b, ...] = self.net.blobs['layer31-conv'].data[...] # fpgaOutput = np.copy(np.frombuffer(buf, dtype=np.float32)\ # .reshape(self.fpgaOutputShape)) # print("Going to Post run") image_detections = self._run( imgList, imgShape, self.netOut ) # N images with K detections per image, each detection is a dict... list of list of dict #for i in range(len(image_detections)): # print("{} boxes detected in image : {}".format(len(image_detections[i]), imgList[i])) boxes = image_detections if (self._args['results_dir']): for i in range(len(imgShape)): filename = os.path.splitext(os.path.basename( imgList[i]))[0] out_file_txt = os.path.join(self._args['results_dir'], filename + '.txt') # print("Saving {} boxes to {}".format(len(boxes[i]), out_file_txt)); sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes[i], imgShape[i]) if (self._args['visualize']): out_file_png = os.path.join(self._args['results_dir'], filename + '.png') # print("Saving result to {}".format(out_file_png)); sys.stdout.flush() draw_boxes(imgList[i], boxes[i], self.labels, self.colors, out_file_png) #[[{"classid": 21, "ll": {"y": 663, "x": 333}, "ur": {"y": 238, "x": 991}, "prob": 0.6764760613441467, "label": "bear"}]] for ri, r in enumerate(meta['requests']): detections = image_detections[ ri] # Examine result for first image boxes = [] # detection will be a dict for detection in detections: x1 = detection["ll"]["x"] #y1 = detection["ll"]["y"] # ONEHACK to conform to the way facedetect does corners y1 = detection["ur"]["y"] x2 = detection["ur"]["x"] #y2 = detection["ur"]["y"] y2 = detection["ll"]["y"] label = detection["classid"] boxes.append([x1, y1, x2, y2, label]) meta['requests'][ri]['boxes'] = boxes meta['requests'][ri]['callback'] = self._callback self.numProcessed += len(meta['requests']) # TODO shouldn't meta always have requests? if 'requests' in meta: for r in meta['requests']: self.put(r) del buf del payload