def run_inference(self, input_data): """Run inference using the zero copy feature from pycoral and returns inference time in ms. """ start = time.monotonic() edgetpu.run_inference(self._interpreter, input_data) self._inf_time = time.monotonic() - start return (self._inf_time * 1000)
def _run_inference_with_gst(self, interpreter, input_data): output_index = interpreter.get_output_details()[0]['index'] bytes_input = bytes(input_data) gst_input = Gst.Buffer.new_wrapped(bytes_input) edgetpu.run_inference(interpreter, gst_input) ret = interpreter.tensor(output_index)() return np.copy(ret)
def authorized_get(self): if self.path == '/': self.send_response(301) self.send_header('Location', '/index.html') self.end_headers() elif self.path == '/index.html': content = PAGE.encode('utf-8') self.send_response(200) self.send_header('Content-Type', 'text/html') self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) elif self.path == '/stream.mjpg': self.send_response(200) self.send_header('Age', 0) self.send_header('Cache-Control', 'no-cache, private') self.send_header('Pragma', 'no-cache') self.send_header('Content-Type', 'multipart/x-mixed-replace; boundary=FRAME') self.end_headers() try: stream_video = io.BytesIO() fps.start() while True: # getting image frame = camera.read() cv2_im = frame # cv2 coding cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) if VFLIP: cv2_im_rgb = cv2.flip(cv2_im_rgb, 0) if HFLIP: cv2_im_rgb = cv2.flip(cv2_im_rgb, 1) # object detection run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = self.append_objs_to_img(cv2_im, inference_size, objs, labels) r, buf = cv2.imencode(".jpg", cv2_im) self.wfile.write(b'--FRAME\r\n') self.send_header('Content-type','image/jpeg') self.send_header('Content-length',str(len(buf))) self.end_headers() self.wfile.write(bytearray(buf)) self.wfile.write(b'\r\n') fps.update() except Exception as e: logging.warning( 'Removed streaming client %s: %s', self.client_address, str(e)) else: self.send_error(404) self.end_headers()
def run_inference(self, input): start_time = time.monotonic() run_inference(self._interpreter, input) duration_ms = (time.monotonic() - start_time) * 1000 output = [] for details in self._interpreter.get_output_details(): tensor = self._interpreter.get_tensor(details['index']) output.append(tensor) return (duration_ms, output)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite' default_labels = 'imagenet_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() pygame.init() pygame.camera.init() camlist = pygame.camera.list_cameras() print('By default using camera: ', camlist[-1]) camera = pygame.camera.Camera(camlist[-1], (640, 480)) inference_size = input_size(interpreter) camera.start() try: last_time = time.monotonic() while True: imagen = camera.get_image() imagen = pygame.transform.scale(imagen, inference_size) start_ms = time.time() run_inference(interpreter, imagen.get_buffer().raw) results = get_classes(interpreter, top_k=3, score_threshold=0) stop_time = time.monotonic() inference_ms = (time.time() - start_ms) * 1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format( inference_ms, fps_ms) for result in results: annotate_text += '\n{:.0f}% {}'.format(100 * result[1], labels[result[0]]) print(annotate_text) finally: camera.stop()
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() run_inference(interpreter, input_tensor) results = get_classes(interpreter, args.top_k, args.threshold) end_time = time.monotonic() text_lines = [ ' ', 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format( result.score, labels.get(result.id, result.id))) print(' '.join(text_lines)) return generate_svg(src_size, text_lines)
def capture_v(args): global outputFrame, lock print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) cap = cv2.VideoCapture(args.camera_idx) # cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # Sony PS3 EYE cam settings: # 320x240 @ 125 FPS, 640x480 @ 60 FPS, 320x240 @187 FPS --> use excat FSP setting cap.set(cv2.CAP_PROP_FPS, 60) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320), cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print("image size=", size) fps = 0 start_time = time.time() while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels) with lock: outputFrame = cv2_im fps += 1 if fps == 200: end_time = time.time() print("cam FPS:", fps / (end_time - start_time)) start_time = time.time() fps = 0 cap.release()
def run(self): print('Loading {} with {} labels.'.format(self.model, self.labels)) interpreter = make_interpreter(self.model) interpreter.allocate_tensors() readLabels = read_label_file(self.labels) inference_size = input_size(interpreter) while(True): # Capture frame-by-frame frameWebcam = self.webcam.read() frameWebcam = imutils.resize(frameWebcam, width=800) framePicam = self.picam.read() framePicam = imutils.resize(framePicam, width=600) # Wenn nicht Coral eingesetzt werden soll, dann die Zeile auskommentieren und den nächsten Block kommentieren #grayWebcam = cv2.cvtColor(frameWebcam, cv2.COLOR_BGR2GRAY) #Bild holen und dieses danach im Coral Interpreter verarbeiten cv2_im_rgb = cv2.cvtColor(frameWebcam, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, self.threshold)[:self.top_k] cv2_im = self.append_objs_to_img( frameWebcam, inference_size, objs, readLabels) #Video in Datei schreiben self.out.write(cv2_im) #starten der Picam grayPicam = cv2.cvtColor(framePicam, cv2.COLOR_BGR2GRAY) grayPicam = cv2.rotate(grayPicam, cv2.cv2.ROTATE_180) #grayPicam = cv2.GaussianBlur(grayPicam, (21, 21), 0) # Display the resulting frame cv2.imshow("RobotBack", grayPicam) cv2.imshow("RobotFront", cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture self.out.release() cv2.destroyAllWindows() self.webcam.stop() self.picam.stop()
def detect_and_classify_faces(detector, classifier, image, threshold, padding=10): predictions = [] boxes = [] faces = [] height, width, _ = image.shape detector_target_size = common.input_size(detector) classifier_target_size = common.input_size(classifier) scale_x, scale_y = width / detector_target_size[ 0], height / detector_target_size[1] resized_image = cv2.resize(image, detector_target_size) run_inference(detector, resized_image.tobytes()) objects = detect.get_objects(detector, threshold) for object in objects: bbox = object.bbox.scale(scale_x, scale_y) startX, startY = int(bbox.xmin - padding), int(bbox.ymin - padding) endX, endY = int(bbox.xmax + padding), int(bbox.ymax + padding) # ensure the bounding boxes fall within the dimensions of the image (startX, startY) = (max(1, startX), max(1, startY)) (endX, endY) = (min(width - 1, endX), min(height - 1, endY)) boxes.append((startX, startY, endX, endY)) face = image[startY:endY, startX:endX] face = cv2.resize(face, classifier_target_size) faces.append(face) for face in faces: run_inference(classifier, face.tobytes()) prediction = classify.get_scores(classifier) predictions.append(prediction) return (boxes, predictions)
def _run_inference_with_different_input_types(self, interpreter, input_data): """Tests inference with different input types. It doesn't check correctness of inference. Instead it checks inference repeatability with different input types. Args: interpreter : A tflite interpreter. input_data (list): A 1-D list as the input tensor. """ output_index = interpreter.get_output_details()[0]['index'] # numpy array np_input = np.asarray(input_data, np.uint8) edgetpu.run_inference(interpreter, np_input) ret = interpreter.tensor(output_index)() ret0 = np.copy(ret) self.assertTrue(np.array_equal(ret0, ret)) # bytes bytes_input = bytes(input_data) edgetpu.run_inference(interpreter, bytes_input) ret = interpreter.tensor(output_index)() self.assertTrue(np.array_equal(ret0, ret)) # ctypes edgetpu.run_inference( interpreter, (np_input.ctypes.data_as(ctypes.c_void_p), np_input.size)) ret = interpreter.tensor(output_index)() self.assertTrue(np.array_equal(ret0, ret)) # Gst buffer if _libgst: gst_input = Gst.Buffer.new_wrapped(bytes_input) edgetpu.run_inference(interpreter, gst_input) self.assertTrue(np.array_equal(ret0, ret)) else: print('Can not import gi. Skip test on Gst.Buffer input type.')
def main(): global mot_tracker default_model_dir = '../models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') parser.add_argument('--tracker', help='Name of the Object Tracker To be used.', default=None, choices=[None, 'sort']) parser.add_argument('--videosrc', help='Directly connected (dev) or Networked (net) video source. ', choices=['dev','net','file'], default='dev') parser.add_argument('--display', help='Is a display attached', default='False', choices=['True', 'False']) parser.add_argument('--netsrc', help="Networked video source, example format: rtsp://192.168.1.43/mpeg4/media.amp",) parser.add_argument('--filesrc', help="Video file source. The videos subdirectory gets mapped into the Docker container, so place your files there.",) parser.add_argument('--modelInt8', help="Model expects input tensors to be Int8, not UInt8", default='False', choices=['True', 'False']) args = parser.parse_args() trackerName=args.tracker ''' Check for the object tracker.''' if trackerName != None: if trackerName == 'mediapipe': if detectCoralDevBoard(): objectOfTracker = ObjectTracker('mediapipe') else: print("Tracker MediaPipe is only available on the Dev Board. Keeping the tracker as None") trackerName = None else: objectOfTracker = ObjectTracker(trackerName) else: pass if trackerName != None and objectOfTracker: mot_tracker = objectOfTracker.trackerObject.mot_tracker else: mot_tracker = None print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) if args.modelInt8=='True': model_int8 = True else: model_int8 = False if args.videosrc=='dev': cap = cv2.VideoCapture(args.camera_idx) elif args.videosrc=='file': cap = cv2.VideoCapture(args.filesrc) else: if args.netsrc==None: print("--videosrc was set to net but --netsrc was not specified") sys.exit() cap = cv2.VideoCapture(args.netsrc) cap.set(cv2.CAP_PROP_BUFFERSIZE, 0) while cap.isOpened(): ret, frame = cap.read() if not ret: if args.videosrc=='file': cap = cv2.VideoCapture(args.filesrc) continue else: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) if model_int8: im_pil = Image.fromarray(cv2_im_rgb) input_type = common.input_details(interpreter, 'dtype') img = (input_type(cv2_im_rgb)- 127.5) / 128.0 run_inference(interpreter, img.flatten()) else: run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] height, width, channels = cv2_im.shape scale_x, scale_y = width / inference_size[0], height / inference_size[1] detections = [] # np.array([]) for obj in objs: bbox = obj.bbox.scale(scale_x, scale_y) element = [] # np.array([]) element.append(bbox.xmin) element.append(bbox.ymin) element.append(bbox.xmax) element.append(bbox.ymax) element.append(obj.score) # print('element= ',element) element.append(obj.id) detections.append(element) # print('dets: ',dets) # convert to numpy array # print('npdets: ',dets) detections = np.array(detections) trdata = [] trackerFlag = False if detections.any(): if mot_tracker != None: trdata = mot_tracker.update(detections) trackerFlag = True cv2_im = append_objs_to_img(cv2_im, detections, labels, trdata, trackerFlag) if args.display == 'True': cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): global mot_tracker global mqtt_bridge global mqtt_topic camera_width = 1280 camera_height = 720 default_model_dir = '../models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') parser.add_argument('--tracker', help='Name of the Object Tracker To be used.', default=None, choices=[None, 'sort']) parser.add_argument( '--videosrc', help='Directly connected (dev) or Networked (net) video source. ', choices=['dev', 'net', 'file'], default='dev') parser.add_argument('--display', help='Is a display attached', default='False', choices=['True', 'False']) parser.add_argument( '--netsrc', help= "Networked video source, example format: rtsp://192.168.1.43/mpeg4/media.amp", ) parser.add_argument( '--filesrc', help= "Video file source. The videos subdirectory gets mapped into the Docker container, so place your files there.", ) parser.add_argument( '--modelInt8', help="Model expects input tensors to be Int8, not UInt8", default='False', choices=['True', 'False']) parser.add_argument('--mqtt-host', help="MQTT broker hostname", default='127.0.0.1') parser.add_argument('--mqtt-port', type=int, help="MQTT broker port number (default 1883)", default=1883) parser.add_argument('--mqtt-topic', dest='mqtt_topic', help="MQTT Object Tracking topic", default="skyscan/object/json") args = parser.parse_args() trackerName = args.tracker ''' Check for the object tracker.''' if trackerName != None: if trackerName == 'mediapipe': if detectCoralDevBoard(): objectOfTracker = ObjectTracker('mediapipe') else: print( "Tracker MediaPipe is only available on the Dev Board. Keeping the tracker as None" ) trackerName = None else: objectOfTracker = ObjectTracker(trackerName) else: pass if trackerName != None and objectOfTracker: mot_tracker = objectOfTracker.trackerObject.mot_tracker else: mot_tracker = None mqtt_topic = args.mqtt_topic mqtt_bridge = mqtt_wrapper.bridge(host=args.mqtt_host, port=args.mqtt_port, client_id="skyscan-object-tracker-%s" % (ID)) mqtt_bridge.publish("skyscan/registration", "skyscan-adsb-mqtt-" + ID + " Registration", 0, False) print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) if args.modelInt8 == 'True': model_int8 = True else: model_int8 = False if args.videosrc == 'dev': cap = cv2.VideoCapture(args.camera_idx) elif args.videosrc == 'file': cap = cv2.VideoCapture(args.filesrc) else: if args.netsrc == None: print("--videosrc was set to net but --netsrc was not specified") sys.exit() cap = cv2.VideoCapture(args.netsrc) cap.set(cv2.CAP_PROP_BUFFERSIZE, 0) timeHeartbeat = 0 while cap.isOpened(): if timeHeartbeat < time.mktime(time.gmtime()): timeHeartbeat = time.mktime(time.gmtime()) + 10 mqtt_bridge.publish("skyscan/heartbeat", "skyscan-object-tracker-" + ID + " Heartbeat", 0, False) start_time = time.monotonic() ret, frame = cap.read() if not ret: if args.videosrc == 'file': cap = cv2.VideoCapture(args.filesrc) continue else: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) if model_int8: im_pil = Image.fromarray(cv2_im_rgb) input_type = common.input_details(interpreter, 'dtype') img = (input_type(cv2_im_rgb) - 127.5) / 128.0 run_inference(interpreter, img.flatten()) else: run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] height, width, channels = cv2_im.shape scale_x, scale_y = width / inference_size[0], height / inference_size[1] detections = [] # np.array([]) for obj in objs: bbox = obj.bbox.scale(scale_x, scale_y) element = [] # np.array([]) element.append(bbox.xmin) element.append(bbox.ymin) element.append(bbox.xmax) element.append(bbox.ymax) element.append(obj.score) # print('element= ',element) element.append(obj.id) detections.append(element) # print('dets: ',dets) # convert to numpy array # print('npdets: ',dets) detections = np.array(detections) trdata = [] trackerFlag = False if detections.any(): if mot_tracker != None: trdata = mot_tracker.update(detections) trackerFlag = True cv2_im = append_objs_to_img(cv2_im, detections, labels, trdata, trackerFlag) follow_x, follow_y = object_to_follow(detections, labels, trdata, trackerFlag) if args.display == 'True': cv2.imshow('frame', cv2_im) if follow_x != None: follow_x = int(follow_x * (camera_height / height)) follow_y = int(follow_y * (camera_width / width)) coordinates = motionControl(follow_x, follow_y) follow = {"x": coordinates[0], "y": coordinates[1]} follow_json = json.dumps(follow) end_time = time.monotonic() print("x: {} y:{} new_x: {} new_y: {} Inference: {:.2f} ms".format( follow_x, follow_y, coordinates[0], coordinates[1], (end_time - start_time) * 1000)) mqtt_bridge.publish(mqtt_topic, follow_json, 0, False) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def detect(self, cv2_im_rgb): run_inference(self.interpreter, cv2_im_rgb.tobytes()) objs = get_objects(self.interpreter, self.threshold)[:self.max_faces] return objs
packed_msg_size = data[:payload_size] data = data[payload_size:] msg_size = struct.unpack(">L", packed_msg_size)[0] while len(data) < msg_size: data += conn.recv(4096) frame_data = data[:msg_size] data = data[msg_size:] frame = pickle.loads(frame_data, fix_imports=True, encoding="bytes") frame = cv2.imdecode(frame, cv2.COLOR_BGR2RGB) height, width, channels = frame.shape frame = cv2.resize(frame, inference_size) run_inference(interpreter, frame.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] scale_x, scale_y = width / inference_size[0], height / inference_size[1] ret_array = [] for obj in objs: ret = {} bbox = obj.bbox.scale(scale_x, scale_y) x0, y0 = int(bbox.xmin), int(bbox.ymin) x1, y1 = int(bbox.xmax), int(bbox.ymax) percent = int(100 * obj.score) ret["label"] = labels.get(obj.id, obj.id) ret["percent"] = percent
def main(): cam_w, cam_h = 640, 480 default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=5, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.5, help='classifier score threshold') args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) pygame.init() pygame.font.init() font = pygame.font.SysFont('Arial', 20) pygame.camera.init() camlist = pygame.camera.list_cameras() inference_size = input_size(interpreter) camera = None for cam in camlist: try: camera = pygame.camera.Camera(cam, (cam_w, cam_h)) camera.start() print(str(cam) + ' opened') break except SystemError as e: print('Failed to open {}: {}'.format(str(cam), str(e))) camera = None if not camera: sys.stderr.write("\nERROR: Unable to open a camera.\n") sys, exit(1) try: display = pygame.display.set_mode((cam_w, cam_h), 0) except pygame.error as e: sys.stderr.write( "\nERROR: Unable to open a display window. Make sure a monitor is attached and that " "the DISPLAY environment variable is set. Example: \n" ">export DISPLAY=\":0\" \n") raise e red = pygame.Color(255, 0, 0) scale_x, scale_y = cam_w / inference_size[0], cam_h / inference_size[1] try: last_time = time.monotonic() while True: mysurface = camera.get_image() imagen = pygame.transform.scale(mysurface, inference_size) start_time = time.monotonic() run_inference(interpreter, imagen.get_buffer().raw) results = get_objects(interpreter, args.threshold)[:args.top_k] stop_time = time.monotonic() inference_ms = (stop_time - start_time) * 1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format( inference_ms, fps_ms) for result in results: bbox = result.bbox.scale(scale_x, scale_y) rect = pygame.Rect(bbox.xmin, bbox.ymin, bbox.width, bbox.height) pygame.draw.rect(mysurface, red, rect, 1) label = '{:.0f}% {}'.format(100 * result.score, labels.get(result.id, result.id)) text = font.render(label, True, red) print(label, ' ', end='') mysurface.blit(text, (bbox.xmin, bbox.ymin)) text = font.render(annotate_text, True, red) print(annotate_text) mysurface.blit(text, (0, 0)) display.blit(mysurface, (0, 0)) pygame.display.flip() finally: camera.stop()