def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") parser.add_argument("--output", help="File path of result.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model, args.thread) interpreter.allocate_tensors() _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter(height, width, channel): ", height, width, channel) # Read label and generate random colors. labels = read_label_file(args.label) if args.label else None last_key = sorted(labels.keys())[len(labels.keys()) - 1] random.seed(42) colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("open video file", args.videopath) cap = cv2.VideoCapture(args.videopath) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) print("Input(height, width, fps): ", h, w, fps) model_name = os.path.splitext(os.path.basename(args.model))[0] # Output Video file # Define the codec and create VideoWriter object video_writer = None if args.output != "": fourcc = cv2.VideoWriter_fourcc(*"MP4V") video_writer = cv2.VideoWriter(args.output, fourcc, fps, (w, h)) elapsed_list = [] while cap.isOpened(): ret, frame = cap.read() if not ret: print("VideoCapture read return false.") break im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) # Run inference. start = time.perf_counter() set_input_tensor(interpreter, resize_im) interpreter.invoke() objs = get_output(interpreter, args.threshold) inference_time = (time.perf_counter() - start) * 1000 # Display result. for obj in objs: class_id = int(obj["class_id"]) caption = "{0}({1:.2f})".format(labels[class_id], obj["score"]) # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj["bounding_box"] xmin = int(xmin * w) xmax = int(xmax * w) ymin = int(ymin * h) ymax = int(ymax * h) # Draw a rectangle and caption. visual.draw_rectangle(frame, (xmin, ymin, xmax, ymax), colors[class_id]) visual.draw_caption(frame, (xmin, ymin, xmax, ymax), caption) # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + " " + fps_text + avg_text visual.draw_caption(frame, (10, 30), display_text) # Output video file if video_writer is not None: video_writer.write(frame) # Display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cap.release() if video_writer is not None: video_writer.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument( "--threshold", help="threshold to filter results.", default=0.5, type=float ) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous( rawCapture, format="rgb", use_video_port=True ): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size) ) interpreter.invoke() elapsed_ms = engine.get_inference_time() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() interpreter.set_num_threads(args.thread) _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter: ", height, width, channel) model_file, *device = args.model.split("@") model_name = os.path.splitext(os.path.basename(model_file))[0] elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 # _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) start = time.perf_counter() image = frame.array im = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) input_im = resize_im.astype(np.float32) input_im = input_im / 255 # Run inference. set_input_tensor(interpreter, input_im[np.newaxis, :, :]) interpreter.invoke() predictions = get_output_tensor(interpreter, 0) pred_mask = create_mask(predictions) pred_mask = np.array(pred_mask, dtype="uint8") pred_mask = pred_mask * 127 pred_mask = cv2.resize(pred_mask, (resolution_width, rezolution_height)) inference_time = (time.perf_counter() - start) * 1000 # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + " " + fps_text + avg_text visual.draw_caption(im, (10, 30), display_text) # display pred_mask = cv2.cvtColor(pred_mask, cv2.COLOR_GRAY2BGR) display = cv2.hconcat([im, pred_mask]) cv2.imshow(WINDOW_NAME, display) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() with open(args.label, "r") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = edgetpu.classification.engine.ClassificationEngine(args.model) width = args.width height = args.height elapsed_list = [] with picamera.PiCamera() as camera: camera.resolution = (width, height) camera.framerate = 30 # _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) start_ms = time.time() results = engine.ClassifyWithImage(input_buf, top_k=args.top_k) elapsed_ms = time.time() - start_ms # Check result. if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview()
boxes, scores, labels = model.predict_on_batch( np.expand_dims(image, axis=0)) print("processing time: ", time.time() - start) # correct for image scale boxes /= scale labels_to_locations = {} # visualize detections for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break if score < 0.5: break start_x = int(box[0]) start_y = int(box[1]) end_x = int(box[2]) end_y = int(box[3]) color = label_color(label) b = box.astype(int) draw_box(draw, b, color=color) caption = "{} {:.3f}".format(labels_to_names[label], score) draw_caption(draw, b, caption) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.imshow('image', draw) key = cv2.waitKey(0) if int(key) == 121: image_fname = osp.split(image_path)[-1] cv2.imwrite('test/{}'.format(image_fname), draw)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input(interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)) interpreter.invoke() # Check result. results = classify.get_classes(interpreter, args.top_k, args.threshold) if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( avg_elapsed_ms, avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() with open(args.label, "r") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = edgetpu.classification.engine.ClassificationEngine(args.model) # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) input_buf = PIL.Image.fromarray(im) results = engine.classify_with_image(input_buf, top_k=args.top_k) elapsed_ms = engine.get_inference_time() # Check result. if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( avg_elapsed_ms, avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) # parser.add_argument( # '--label', help='File path of label file.', required=True) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize colormap colormap = label_util.create_pascal_label_colormap() # Initialize engine. engine = BasicEngine(args.model) resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, _ = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous( rawCapture, format="rgb", use_video_port=True ): start_ms = time.time() rawCapture.truncate(0) image = frame.array # Create inpute tensor # camera resolution (640, 480) => input tensor size (513, 513) input_buf = Image.fromarray(image) input_buf = input_buf.resize((width, height), Image.NEAREST) input_tensor = np.asarray(input_buf).flatten() # Run inference latency, result = engine.RunInference(input_tensor) # Create segmentation map seg_map = np.array(result, dtype=np.uint8) seg_map = np.reshape(seg_map, (width, height)) seg_image = label_util.label_to_color_image(colormap, seg_map) # segmentation map resize 513, 513 => camera resolution(640, 480) seg_image = cv2.resize(seg_image, (resolution_width, rezolution_height)) out_image = image // 2 + seg_image // 2 im = cv2.cvtColor(out_image, cv2.COLOR_RGB2BGR) # display image elapsed_ms = time.time() - start_ms # Calc fps. fps = 1 / elapsed_ms fps_text = "{0:.2f}ms, {1:.2f}fps".format((elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text) latency_text = "Runinference latency: {0:.2f}ms".format(latency) visual.draw_caption(im, (10, 60), latency_text) # Display image cv2.imshow(WINDOW_NAME, im) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", type=float, default=0.5) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) is_inpaint_mode = False resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): start_ms = time.time() rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) interpreter.invoke() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if is_inpaint_mode == True: mask = np.full((args.height, args.width), 0, dtype=np.uint8) for obj in objects: if labels and obj.id in labels: # Draw a mask rectangle. box = ( obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax, ) visual.draw_rectangle(mask, box, (255, 255, 255), thickness=-1) # Image Inpainting dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS) else: for obj in objects: if labels and obj.id in labels: label_name = labels[obj.id] caption = "{0}({1:.2f})".format( label_name, obj.score) # Draw a rectangle and caption. box = ( obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax, ) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) dst = im # Calc fps. elapsed_ms = time.time() - start_ms fps = 1 / elapsed_ms # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(dst, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, dst) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break elif key == ord(" "): is_inpaint_mode = not is_inpaint_mode print("inpant mode change :", is_inpaint_mode) finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print('open camera.') cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) elapsed_list = [] while (cap.isOpened()): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) input_buf = PIL.Image.fromarray(im) # Run inference. start_ms = time.time() ans = engine.detect_with_image( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) elapsed_ms = engine.get_inference_time() # Display result. if ans: for obj in ans: label_name = "Unknown" if labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(frame, box, colors[obj.label_id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--threshold", help="Score threshold.", default=0.0, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() with open(args.label, "r") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) interpreter.invoke() results = classify.get_classes(interpreter, args.top_k, args.threshold) elapsed_ms = (time.perf_counter() - start) * 1000 # Check result. if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms * 1000 elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") parser.add_argument("--output", help="File path of result.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() interpreter.set_num_threads(args.thread) _, height, width, channel = interpreter.get_input_details()[0]['shape'] print('Interpreter: ', height, width, channel) # Initialize colormap random.seed(42) colormap = label_util.create_pascal_label_colormap() # Video capture. if args.videopath == '': print('open camera.') cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) print('Input: ', h, w, fps) model_file, *device = args.model.split('@') model_name = os.path.splitext(os.path.basename(model_file))[0] # Output Video file # Define the codec and create VideoWriter object video_writer = None if args.output != '': fourcc = cv2.VideoWriter_fourcc(*'MP4V') video_writer = cv2.VideoWriter(args.output, fourcc, fps, (w, h)) elapsed_list = [] while (cap.isOpened()): ret, frame = cap.read() if ret == False: print('VideoCapture read return false.') break im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) # Run inference. start = time.perf_counter() set_input_tensor(interpreter, resize_im) interpreter.invoke() seg_map = get_output(interpreter) inference_time = (time.perf_counter() - start) * 1000 # Display result seg_map = np.reshape(seg_map, (width, height)) seg_image = label_util.label_to_color_image(colormap, seg_map) seg_image = cv2.resize(seg_image, (w, h)) im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) // 2 + seg_image // 2 im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + ' ' + fps_text + avg_text visual.draw_caption(im, (10, 30), display_text) # Output video file if video_writer != None: video_writer.write(im) # Display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cap.release() if video_writer != None: video_writer.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--nano", help="Works with JETSON Nao and Pi Camera.", action="store_true") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize colormap colormap = label_util.create_pascal_label_colormap() # Initialize engine. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) if args.nano == True: GST_STR = "nvarguscamerasrc \ ! video/x-raw(memory:NVMM), width={0:d}, height={1:d}, format=(string)NV12, framerate=(fraction)30/1 \ ! nvvidconv flip-method=2 ! video/x-raw, width=(int){2:d}, height=(int){3:d}, format=(string)BGRx \ ! videoconvert \ ! appsink".format(args.width, args.height, args.width, args.height) cap = cv2.VideoCapture(GST_STR, cv2.CAP_GSTREAMER) else: cap = cv2.VideoCapture(0) cap.set(3, args.width) cap.set(4, args.height) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) while cap.isOpened(): _, frame = cap.read() start = time.perf_counter() # Create inpute tensor # camera resolution => input tensor size (513, 513) input_buf = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) _, scale = common.set_resized_input( interpreter, (cap_width, cap_height), lambda size: cv2.resize(input_buf, size), ) # Run inference interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Create segmentation map result = segment.get_output(interpreter) seg_map = result[:height, :width] seg_image = label_util.label_to_color_image(colormap, seg_map) # segmentation map resize 513, 513 => camera resolution seg_image = cv2.resize(seg_image, (args.width, args.height)) im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) // 2 + seg_image // 2 im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) # Calc fps. fps = 1000.0 / elapsed_ms fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, im) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break if args.nano != True: for i in range(10): ret, frame = cap.read() # When everything done, release the window cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize colormap colormap = label_util.create_pascal_label_colormap() # Initialize engine. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) start = time.perf_counter() # Create inpute tensor # camera resolution (640, 480) => input tensor size (513, 513) _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) # Run inference. interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Create segmentation map result = segment.get_output(interpreter) seg_map = result[:height, :width] seg_image = label_util.label_to_color_image(colormap, seg_map) # segmentation map resize 513, 513 => camera resolution(640, 480) seg_image = cv2.resize(seg_image, (resolution_width, rezolution_height)) out_image = image // 2 + seg_image // 2 im = cv2.cvtColor(out_image, cv2.COLOR_RGB2BGR) # display image # Calc fps. fps = 1000.0 / elapsed_ms fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, im) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("Open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("Open video file: ", args.videopath) cap = cv2.VideoCapture(args.videopath) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input(interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)) interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(frame, box, colors[obj.id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--width', help='Resolution width.', default=640, type=int) parser.add_argument('--height', help='Resolution height.', default=480, type=int) parser.add_argument('--nano', help='Works with JETSON Nao and Pi Camera.', action='store_true') # parser.add_argument( # '--label', help='File path of label file.', required=True) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize colormap colormap = label_util.create_pascal_label_colormap() # Initialize engine. engine = BasicEngine(args.model) _, width, height, _ = engine.get_input_tensor_shape() if args.nano == True: GST_STR = 'nvarguscamerasrc \ ! video/x-raw(memory:NVMM), width={0:d}, height={1:d}, format=(string)NV12, framerate=(fraction)30/1 \ ! nvvidconv flip-method=2 ! video/x-raw, width=(int){2:d}, height=(int){3:d}, format=(string)BGRx \ ! videoconvert \ ! appsink'.format(args.width, args.height, args.width, args.height) cap = cv2.VideoCapture(GST_STR, cv2.CAP_GSTREAMER) else: cap = cv2.VideoCapture(0) cap.set(3, args.width) cap.set(4, args.height) while(cap.isOpened()): _, frame = cap.read() start_ms = time.time() # Create inpute tensor # camera resolution => input tensor size (513, 513) input_buf = cv2.resize(frame, (width, height)) input_buf = cv2.cvtColor(input_buf, cv2.COLOR_BGR2RGB) input_tensor = input_buf.flatten() # Run inference latency, result = engine.RunInference(input_tensor) # Create segmentation map seg_map = np.array(result, dtype=np.uint8) seg_map = np.reshape(seg_map, (width, height)) seg_image = label_util.label_to_color_image(colormap, seg_map) # segmentation map resize 513, 513 => camera resolution seg_image = cv2.resize(seg_image, (args.width, args.height)) im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) // 2 + seg_image // 2 im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) elapsed_ms = time.time() - start_ms # Calc fps. fps = 1 / elapsed_ms fps_text = '{0:.2f}ms, {1:.2f}fps'.format((elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text) latency_text = 'RunInference latency: {0:.2f}ms'.format(latency) visual.draw_caption(im, (10, 60), latency_text) # Display image cv2.imshow(WINDOW_NAME, im) key = cv2.waitKey(10) & 0xFF if key == ord('q'): break if args.nano != True: for i in range(10): ret, frame = cap.read() # When everything done, release the window cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--label', help='File path of label file.', required=True) parser.add_argument( '--top_k', help="keep top k candidates.", default=3) parser.add_argument( '--threshold', help="threshold to filter results.", default=0.5, type=float) parser.add_argument( '--width', help="Resolution width.", default=640, type=int) parser.add_argument( '--height', help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format='rgb', use_video_port=True): rawCapture.truncate(0) # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. start_ms = time.time() ans = engine.DetectWithImage(input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k) # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05, # keep_aspect_ratio=False, relative_coord=False, top_k=10) elapsed_ms = time.time() - start_ms # Display result. if ans: for obj in ans: label_name = 'Unknown' if labels: label_name = labels[obj.label_id] caption = '{0}({1:.2f})'.format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = '{0:.2f}ms, {1:.2f}fps'.format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord('q'): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", type=float, default=0.5) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) is_inpaint_mode = False resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): start_ms = time.time() rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. ans = engine.DetectWithImage( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) # Display result. if is_inpaint_mode == True: mask = np.full((args.height, args.width), 0, dtype=np.uint8) if ans: for obj in ans: if labels and obj.label_id in labels: # Draw a mask rectangle. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(mask, box, (255, 255, 255), thickness=-1) # Image Inpainting dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS) else: for obj in ans: if labels and obj.label_id in labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format( label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) dst = im # Calc fps. elapsed_ms = time.time() - start_ms fps = 1 / elapsed_ms # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(dst, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, dst) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break elif key == ord(" "): is_inpaint_mode = not is_inpaint_mode print("inpant mode change :", is_inpaint_mode) finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()