def run(self): # This method runs in a separate thread global done, rawCapture while not self.terminated: # Wait for an image to be written to the stream if self.event.wait(1): try: #self.stream.seek(0) self.stream.readinto(rawCapture) # Read the image and do some processing on it #Image.open(self.stream) #... #... self.frame_buf_val = np.frombuffer(self.stream.getvalue(), dtype=np.uint8) self.output = engine.DetectWithInputTensor( self.frame_buf_val, top_k=10) # Set done to True if you want the script to terminate # at some point #done=True finally: # Reset the stream and event self.stream.seek(0) self.stream.truncate() self.event.clear() # Return ourselves to the pool with lock: pool.append(self)
time.sleep(0.1) while DISPLAY.loop_running(): fps_txt.draw() ms_txt.draw() ms = str(elapsed_ms*1000) ms_txt.quick_change(ms) i += 1 if i > N: tm = time.time() fps = "{:5.1f}FPS".format(i / (tm - last_tm)) fps_txt.quick_change(fps) i = 0 last_tm = tm #if new_pic: results = engine.DetectWithInputTensor(g_input, top_k=4) if new_pic: if results: num_obj = 0 for obj in results: num_obj = num_obj + 1 buf = bbox.buf[0] # alias for brevity below buf.array_buffer[:,:3] = 0.0; for j, obj in enumerate(results): coords = (obj.bounding_box - 0.5) * [[1.0, -1.0]] * mdl_dims # broadcasting will fix the arrays size differences score = round(obj.score,2) ix = 8 * j buf.array_buffer[ix:(ix + 8), 0] = coords[X_IX, 0] + 2 * X_OFF buf.array_buffer[ix:(ix + 8), 1] = coords[Y_IX, 1] + 2 * Y_OFF buf.re_init(); # new_pic = False
rgb = PiRGBArray(camera, size=camera.resolution * 3) _, width, height, channels = engine.get_input_tensor_shape() camera.start_preview(fullscreen=False, layer=0, window=(preview_mid_X, preview_mid_Y, preview_W, preview_H)) try: while DISPLAY.loop_running(): stream = io.BytesIO() camera.capture(stream, use_video_port=True, format='bgr') stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) #stream.close() start_ms = time.time() results = engine.DetectWithInputTensor(input, top_k=max_obj) elapsed_ms = time.time() - start_ms ms = str(int(elapsed_ms * 1000)) + "ms" ms_txt.draw() ms_txt.quick_change(ms) fps_txt.draw() i += 1 if i > N: tm = time.time() fps = "{:6.2f}FPS".format(i / (tm - last_tm)) fps_txt.quick_change(fps) i = 0 last_tm = tm if results: num_obj = 0 for obj in results:
fps = "00.0 fps" N = 10 ms = "00" while True: print("started the loop of draemes") screen = pygame.display.get_surface( ) #get the surface of the current active display resized_x, resized_y = mdl_dims, mdl_dims #screen.get_width(), screen.get_height() img = pycam.get_image() img = pygame.transform.scale(img, (resized_x, resized_y)) screen.blit(img, (0, 0)) if new_pic: start_ms = time.time() results = engine.DetectWithInputTensor(frame_buf_val, threshold=thresh, top_k=max_obj) elapsed_ms = time.time() - start_ms #pygame.surfarray.blit_array(screen, img_arr) i += 1 if results: num_obj = 0 for obj in results: num_obj = num_obj + 1 for obj in results: bbox = obj.bounding_box.flatten().tolist() label_id = int(round(obj.label_id, 1)) class_label = "%s" % (labels[label_id]) fnt_class_label = fnt.render(class_label, True, (255, 255, 255)) fnt_class_label_width = fnt_class_label.get_rect().width
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', default= "/home/rock64/models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite" ) parser.add_argument('--labels', help="Path of the labels file.", default="/home/rock64/detection/coco_labels.txt") parser.add_argument('--dims', help='Model input dimension', default=320) parser.add_argument('--max_obj', help='Maximum objects detected [>= 1], default=10', default=10) parser.add_argument('--thresh', help='Threshold confidence [0.1-1.0], default=0.3', default=0.3) parser.add_argument('--video_off', help='Video display off, for increased FPS', action='store_true', default=False) parser.add_argument( '--cam_w', help= 'Set camera resolution, examples: 96, 128, 256, 352, 384, 480, 640, 1920', default=320) parser.add_argument( '--cam_h', help= 'Set camera resolution, examples: 96, 128, 256, 352, 384, 480, 640, 1920', default=320) if len(sys.argv[0:]) == 0: parser.print_help() #parser.print_usage() # for just the usage line parser.exit() args = parser.parse_args() if args.labels: with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) else: lbl_input = input("Type label name for this single class model:") if lbl_input: labels = {0: lbl_input} else: labels = {0: 'object'} mdl_dims = int(args.dims) max_obj = int(args.max_obj) cam_w = int(args.cam_w) cam_h = int(args.cam_h) thresh = float(args.thresh) video_off = args.video_off engine = edgetpu.detection.engine.DetectionEngine(args.model) pygame.init() pygame.camera.init() if not video_off: screen = pygame.display.set_mode( (cam_w, cam_h), pygame.HWSURFACE + pygame.HWACCEL + pygame.DOUBLEBUF) #+pygame.ASYNCBLIT,16) pygame.display.set_caption('Object Detection') pygame.font.init() fnt_sz = 18 fnt = pygame.font.SysFont('Arial', fnt_sz) camlist = pygame.camera.list_cameras() if camlist: pycam = pygame.camera.Camera(camlist[0], (cam_w, cam_h)) else: print("No camera found!") exit pycam.start() time.sleep(1) x1 = x2 = y1 = y2 = i = j = fps_last = fps_total = 0 start_ms = last_tm = elapsed_ms = time.time() results = None fps_avg = "00.0" N = 10 ms = "00" scale_req = False if (cam_w != mdl_dims) or (cam_h != mdl_dims): scale_req = True #if not video_off : # screen = pygame.display.get_surface() #get the surface of the current active display # screen_x,screen_y = screen.get_width(), screen.get_height() img = pycam.get_image() img = pygame.transform.scale(img, (mdl_dims, mdl_dims)) while True: #if not video_off : #screen = pygame.display.get_surface() #get the surface of the current active display #scr_w,scr_h = screen.get_width(), screen.get_height() if pycam.query_image(): #grab image from camera, when available img = pycam.get_image() if not video_off: if scale_req: img = pygame.transform.scale(img, (cam_w, cam_h)) screen.blit(img, (0, 0)) detect_img = pygame.transform.scale( img, (mdl_dims, mdl_dims)) else: screen.blit(img, (0, 0)) detect_img = img else: if scale_req: detect_img = pygame.transform.scale( img, (mdl_dims, mdl_dims)) else: detect_img = img img_arr = pygame.surfarray.pixels3d(detect_img) img_arr = np.swapaxes(img_arr, 0, 1) img_arr = np.ascontiguousarray(img_arr) frame = io.BytesIO(img_arr) frame_buf_val = np.frombuffer(frame.getvalue(), dtype=np.uint8) start_ms = time.time() results = engine.DetectWithInputTensor(frame_buf_val, threshold=thresh, top_k=max_obj) elapsed_ms = time.time() - start_ms i += 1 if results: obj_cnt = 0 obj_id = 0 for obj in results: obj_cnt += 1 for obj in results: obj_id += 1 bbox = obj.bounding_box.flatten().tolist() label_id = int(round(obj.label_id, 1)) class_label = "%s" % (labels[label_id]) score = round(obj.score, 2) x1 = round(bbox[0] * cam_w) y1 = round(bbox[1] * cam_h) x2 = round(bbox[2] * cam_w) y2 = round(bbox[3] * cam_h) rect_width = x2 - x1 rect_height = y2 - y1 class_score = "%.2f" % (score) if not video_off: fnt_class_score = fnt.render(class_score, True, (0, 255, 255)) fnt_class_score_width = fnt_class_score.get_rect( ).width screen.blit(fnt_class_score, (x2 - fnt_class_score_width, y1 - fnt_sz)) fnt_class_label = fnt.render(class_label, True, (255, 255, 255)) fnt_class_label_width = fnt_class_label.get_rect( ).width screen.blit(fnt_class_label, (x1, y1 - fnt_sz)) fnt_ms = fnt.render(ms, True, (255, 255, 255)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms, ((cam_w / 2) - (fnt_ms_width / 2), 0)) bbox_rect = pygame.draw.rect( screen, (0, 255, 0), (x1, y1, rect_width, rect_height), 4) if i > N: ms = "(%d%s%d) %s%.2fms" % (obj_cnt, "/", max_obj, "objects detected in ", elapsed_ms * 1000) print(ms) output = "%s%d %s%s %s%s %s%d %s%d %s%d %s%d %s" % ( "id:", obj_id, "class:", class_label, "conf:", class_score, "x1:", x1, "y1:", y1, "x2:", x2, "y2:", y2, fps_avg) print(output) else: if i > N: ms = "%s %.2fms" % ("No objects detected in", elapsed_ms * 1000) print(ms) if not video_off: fnt_ms = fnt.render(ms, True, (255, 0, 0)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms, ((cam_w / 2) - (fnt_ms_width / 2), 0)) if i > N: tm = time.time() fps_last = i / (tm - last_tm) if j < 5: j += 1 fps_total = fps_total + fps_last else: fps_avg = "AVG_FPS:{:5.1f} ".format(fps_total / 5) fps_total = 0 j = 0 i = 0 last_tm = tm if not video_off: fps_thresh = fps_avg + " thresh:" + str(thresh) fps_fnt = fnt.render(fps_thresh, True, (255, 255, 0)) fps_width = fps_fnt.get_rect().width screen.blit(fps_fnt, ((cam_w / 2) - (fps_width / 2), 20)) for event in pygame.event.get(): keys = pygame.key.get_pressed() if (keys[pygame.K_ESCAPE] == 1): pycam.stop() pygame.display.quit() sys.exit() #elif event.type == pygame.VIDEORESIZE and not video_off: # screen = pygame.display.set_mode((event.w,event.h),pygame.RESIZABLE) if not video_off: pygame.display.update()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument( '--labels', help='labels file path OR no arg will prompt for label name', required=False) parser.add_argument('--dims', help='Model input dimension', required=True) parser.add_argument('--max_obj', help='Maximum objects detected [>= 1], default=1', default=1, required=False) parser.add_argument('--thresh', help='Threshold confidence [0.1-1.0], default=0.3', default=0.3, required=False) parser.add_argument('--video_off', help='Video display on/off, for increased FPS', action='store_true', required=False) parser.add_argument( '--cam_res_x', help= 'Set camera X resolution, examples: 96, 128, 256, 352, 384, 480, 640, 1920', default=320, required=False) parser.add_argument( '--cam_res_y', help= 'Set camera Y resolution, examples: 96, 128, 256, 352, 384, 480, 640, 1920', default=320, required=False) if len(sys.argv[0:]) == 0: parser.print_help() #parser.print_usage() # for just the usage line parser.exit() args = parser.parse_args() if args.labels: with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) else: lbl_input = input("Type label name for this single object model:") if lbl_input: labels = {0: lbl_input} else: labels = {0: 'object'} mdl_dims = int(args.dims) if args.max_obj: max_obj = int(args.max_obj) if max_obj < 1: max_obj = 1 if args.thresh: thresh = float(args.thresh) if thresh < 0.1 or thresh > 1.0: thresh = 0.3 video_off = False if args.video_off: video_off = True if args.cam_res_x: cam_res_x = int(args.cam_res_x) else: cam_res_x = 320 if args.cam_res_y: cam_res_y = int(args.cam_res_y) else: cam_res_y = 320 #c = threading.Condition() #frame_buf_val = None engine = edgetpu.detection.engine.DetectionEngine(args.model) pygame.init() pygame.camera.init() camlist = pygame.camera.list_cameras() if camlist: pycam = pygame.camera.Camera(camlist[0], (cam_res_x, cam_res_y)) pycam.start() time.sleep(2) else: print("No camera found!") exit if video_off == False: screen = pygame.display.set_mode((cam_res_x, cam_res_y), pygame.RESIZABLE) pygame.display.set_caption('Object Detection') screen = pygame.display.get_surface( ) #get the surface of the current active display resized_x, resized_y = size = screen.get_width(), screen.get_height() pygame.font.init() fnt_sz = 18 fnt = pygame.font.SysFont('Arial', fnt_sz) lbl_fnt_sz = 10 lbl_fnt = pygame.font.SysFont('Arial', lbl_fnt_sz) x1 = x2 = y1 = y2 = 0 last_tm = time.time() start_ms = time.time() elapsed_ms = time.time() i = 0 results = None fps = "00.0 fps" N = 10 ms = "00" #py_thread = PyThread().start() #detection_thread = Detection(args.model).start() while True: start_ms = time.time() #if pycam.query_image(): img = pycam.get_image() #img = pycam.get_image() if video_off == False: img = pygame.transform.scale(img, (resized_x, resized_y)) screen.blit(img, (0, 0)) detect_img = pygame.transform.scale(img, (mdl_dims, mdl_dims)) img_arr = pygame.surfarray.pixels3d(img) img_arr = np.swapaxes(img_arr, 0, 1) img_arr = np.ascontiguousarray(img_arr) frame = io.BytesIO(img_arr) frame_buf_val = np.frombuffer(frame.getvalue(), dtype=np.uint8) #print(frame_buf_val) #start_ms = time.time() #results = detection_thread.get_results() results = engine.DetectWithInputTensor(frame_buf_val, threshold=thresh, top_k=max_obj) #elapsed_ms = time.time() - start_ms #pygame.surfarray.blit_array(screen, img_arr) if i > 5: tm = time.time() fps = "fps:{:5.1f} ".format(i / (tm - last_tm)) i = 0 last_tm = tm i += 1 if results: num_obj = 0 for obj in results: bbox = obj.bounding_box.flatten().tolist() label_id = int(round(obj.label_id, 1)) class_label = "%s" % (labels[label_id]) score = round(obj.score, 2) class_score = "%.2f" % (score) if video_off == False: x1 = round(bbox[0] * resized_x) y1 = round(bbox[1] * resized_y) x2 = round(bbox[2] * resized_x) y2 = round(bbox[3] * resized_y) rect_width = x2 - x1 rect_height = y2 - y1 fnt_class_label = lbl_fnt.render(class_label, True, (255, 255, 255)) fnt_class_label_width = fnt_class_label.get_rect().width screen.blit(fnt_class_label, (x1, y1 - lbl_fnt_sz)) fnt_class_score = lbl_fnt.render(class_score, True, (0, 255, 255)) fnt_class_score_width = fnt_class_score.get_rect().width screen.blit(fnt_class_score, (x2 - fnt_class_score_width, y1 - lbl_fnt_sz)) bbox_rect = pygame.draw.rect( screen, (0, 255, 0), (x1, y1, rect_width, rect_height), 4) results_line = "%d, %s, %s, %d,%d,%d,%d" % ( num_obj, class_label, class_score, x1, y1, x2, y2) print(results_line) elapsed_ms = time.time() - start_ms if i > N: ms = "(%d%s%d) %s%.2fms" % (num_obj, "/", max_obj, "objects detected in ", elapsed_ms * 1000) fnt_ms = fnt.render(ms, True, (255, 255, 255)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms, ((resized_x / 2) - (fnt_ms_width / 2), 0)) else: #video_off == True x1 = round(bbox[0] * mdl_dims) y1 = round(bbox[1] * mdl_dims) x2 = round(bbox[2] * mdl_dims) y2 = round(bbox[3] * mdl_dims) results_line = "%s, %d, %s, %s, %d,%d,%d,%d" % ( fps, num_obj, class_label, class_score, x1, y1, x2, y2) print(results_line) num_obj = num_obj + 1 else: if video_off == False: elapsed_ms = time.time() - start_ms if i > N: ms = "%s %.2fms" % ("No objects detected in", elapsed_ms * 1000) fnt_ms = fnt.render(ms, True, (255, 0, 0)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms, ((resized_x / 2) - (fnt_ms_width / 2), 0)) else: print("No results") if video_off == False: fps_thresh = fps + " thresh:" + str(thresh) fps_fnt = fnt.render(fps_thresh, True, (255, 255, 0)) fps_width = fps_fnt.get_rect().width screen.blit(fps_fnt, ((resized_x / 2) - (fps_width / 2), 20)) pygame.display.update() for event in pygame.event.get(): keys = pygame.key.get_pressed() if (keys[pygame.K_ESCAPE] == 1): #pycam.stop() #pygame.display.quit() pygame.quit() sys.exit() elif event.type == pygame.VIDEORESIZE: screen = pygame.display.set_mode((event.w, event.h), pygame.RESIZABLE)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--dims', help='Model input dimension', required=True) args = parser.parse_args() #Set all input params equal to the input dimensions expected by the model mdl_dims = int(args.dims) #dims must be a factor of 32 for picamera resolution to work #Set max num of objects you want to detect per frame max_obj = 10 engine = edgetpu.detection.engine.DetectionEngine(args.model) pygame.init() pygame.display.set_caption('Face Detection') screen = pygame.display.set_mode((mdl_dims, mdl_dims), pygame.DOUBLEBUF|pygame.HWSURFACE) pygame.font.init() fnt_sz = 18 myfont = pygame.font.SysFont('Arial', fnt_sz) camera = picamera.PiCamera() strm_thread = PiVideoStream().start() strm_thread.rgbCapture = bytearray(strm_thread.camera.resolution[0] * strm_thread.camera.resolution[1] * 3) #Set camera resolution equal to model dims camera.resolution = (mdl_dims, mdl_dims) #rgb = bytearray(camera.resolution[0] * camera.resolution[1] * 3) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() x1, x2, x3, x4, x5 = 0, 50, 50, 0, 0 y1, y2, y3, y4, y5 = 50, 50, 0, 0, 50 z = 5 last_tm = time.time() i = 0 exitFlag = True while(exitFlag): for event in pygame.event.get(): keys = pygame.key.get_pressed() if(keys[pygame.K_ESCAPE] == 1): exitFlag = False with picamera.array.PiRGBArray(camera, size=(mdl_dims, mdl_dims)) as stream: #stream = io.BytesIO() start_ms = time.time() camera.capture(stream, use_video_port=True, format='rgb') elapsed_ms = time.time() - start_ms stream.seek(0) stream.readinto(stream.rgbCapture) #stream.truncate() #needed?? img = pygame.image.frombuffer(stream.rgbCapture[0: (camera.resolution[0] * camera.resolution[1] * 3)], camera.resolution, 'RGB') input = np.frombuffer(stream.getvalue(), dtype=np.uint8) #Inference results = engine.DetectWithInputTensor(input, top_k=max_obj) stream.close() if img: screen.blit(img, (0,0)) if results: num_obj = 0 for obj in results: num_obj = num_obj + 1 for obj in results: bbox = obj.bounding_box.flatten().tolist() score = round(obj.score,2) x1 = round(bbox[0] * mdl_dims) y1 = round(bbox[1] * mdl_dims) x2 = round(bbox[2] * mdl_dims) y2 = round(bbox[3] * mdl_dims) rect_width = x2 - x1 rect_height = y2 - y1 class_score = "%.2f" % (score) ms = "(%d) %s%.2fms" % (num_obj, "faces detected in ", elapsed_ms*1000) fnt_class_score = myfont.render(class_score, True, (0,0,255)) fnt_class_score_width = fnt_class_score.get_rect().width screen.blit(fnt_class_score,(x1, y1-fnt_sz)) fnt_ms = myfont.render(ms, True, (255,255,255)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms,((mdl_dims / 2) - (fnt_ms_width / 2), 0)) bbox_rect = pygame.draw.rect(screen, (0,0,255), (x1, y1, rect_width, rect_height), 2) #pygame.display.update(bbox_rect) else: elapsed_ms = time.time() - start_ms ms = "%s %.2fms" % ("No faces detected in", elapsed_ms*1000) fnt_ms = myfont.render(ms, True, (255,0,0)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms,((mdl_dims / 2) - (fnt_ms_width / 2), 0)) pygame.display.update() pygame.display.quit()
def start_capture(): # has to be in yet another thread as blocking global mdl_dims, pool, engine, results, screen, start_ms, elapsed_ms, fnt_sz, preview_mid_X, preview_mid_Y, camera, rgb, max_obj x1, x2, x3, x4, x5 = 0, 50, 50, 0, 0 y1, y2, y3, y4, y5 = 50, 50, 0, 0, 50 z = 5 pygame.init() pygame.display.set_caption('Face Detection') screen = pygame.display.set_mode((mdl_dims, mdl_dims), pygame.DOUBLEBUF | pygame.HWSURFACE) pygame.font.init() fnt_sz = 18 myfont = pygame.font.SysFont('Arial', fnt_sz) x1, x2, x3, x4, x5 = 0, 50, 50, 0, 0 y1, y2, y3, y4, y5 = 50, 50, 0, 0, 50 z = 5 last_tm = time.time() i = 0 exitFlag = True while (exitFlag): for event in pygame.event.get(): keys = pygame.key.get_pressed() if (keys[pygame.K_ESCAPE] == 1): exitFlag = False with picamera.PiCamera() as camera: pool = [ImageProcessor() for i in range(4)] camera.resolution = (mdl_dims, mdl_dims) rgb = bytearray(camera.resolution[0] * camera.resolution[1] * 3) camera.framerate = 24 with picamera.array.PiRGBArray(camera, size=(mdl_dims, mdl_dims)) as stream: start_ms = time.time() camera.capture_sequence(streams(), format='rgb', use_video_port=True) elapsed_ms = time.time() - start_ms #Inference results = engine.DetectWithInputTensor(input, top_k=max_obj) img = pygame.image.frombuffer( rgb[0:(camera.resolution[0] * camera.resolution[1] * 3)], camera.resolution, 'RGB') if img: screen.blit(img, (0, 0)) if results: num_obj = 0 for obj in results: num_obj = num_obj + 1 for obj in results: bbox = obj.bounding_box.flatten().tolist() score = round(obj.score, 2) x1 = round(bbox[0] * mdl_dims) y1 = round(bbox[1] * mdl_dims) x2 = round(bbox[2] * mdl_dims) y2 = round(bbox[3] * mdl_dims) rect_width = x2 - x1 rect_height = y2 - y1 class_score = "%.2f" % (score) ms = "(%d) %s%.2fms" % (num_obj, "faces detected in ", elapsed_ms * 1000) fnt_class_score = myfont.render( class_score, True, (0, 0, 255)) fnt_class_score_width = fnt_class_score.get_rect( ).width screen.blit(fnt_class_score, (x1, y1 - fnt_sz)) fnt_ms = myfont.render(ms, True, (255, 255, 255)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms, ((mdl_dims / 2) - (fnt_ms_width / 2), 0)) bbox_rect = pygame.draw.rect( screen, (0, 0, 255), (x1, y1, rect_width, rect_height), 2) #pygame.display.update(bbox_rect) else: elapsed_ms = time.time() - start_ms ms = "%s %.2fms" % ("No faces detected in", elapsed_ms * 1000) fnt_ms = myfont.render(ms, True, (255, 0, 0)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms, ((mdl_dims / 2) - (fnt_ms_width / 2), 0)) pygame.display.update()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--labels', help='File path of labels file OR leave empty for prompt to type label name', required=False) parser.add_argument( '--dims', help='Model input dimension', required=True) parser.add_argument( '--max_obj', help='Maximum objects detected [>= 1], default=1', default=1, required=False) parser.add_argument( '--thresh', help='Threshold confidence [0.1-1.0], default=0.3', default=0.3, required=False) parser.add_argument( '--video_off', help='Video display off, for increased FPS', action='store_true', required=False) if len(sys.argv[0:])==0: parser.print_help() #parser.print_usage() # for just the usage line parser.exit() args = parser.parse_args() if args.labels: with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) else: lbl_input = None lbl_input = input("Type label name for this single object model:") if lbl_input == None: labels = ["Object"] else: labels = [lbl_input] mdl_dims = int(args.dims) if args.max_obj: max_obj = int(args.max_obj) if max_obj < 1: max_obj = 1 if args.thresh: thresh = float(args.thresh) if thresh < 0.1 or thresh > 1.0: thresh = 0.3 #else: # thresh = 0.3 video_off = False if args.video_off : video_off = True cam_res_x = 256 cam_res_y = 256 max_fps = 30 engine = edgetpu.detection.engine.DetectionEngine(args.model) pygame.init() pygame.camera.init() #screen = pygame.display.set_mode((mdl_dims, mdl_dims), pygame.DOUBLEBUF|pygame.HWSURFACE) screen = pygame.display.set_mode((mdl_dims,mdl_dims), pygame.RESIZABLE) pygame.display.set_caption('Object Detection') pycam = pygame.camera.Camera("/dev/video0",(cam_res_x,cam_res_y)) #, "RGB") pycam.start() #screen.convert() clock = pygame.time.Clock() ##camera = picamera.PiCamera() ##camera.resolution = (mdl_dims, mdl_dims) ##camera.framerate = max_fps pygame.font.init() fnt_sz = 18 fnt = pygame.font.SysFont('Arial', fnt_sz) x1=x2=y1=y2=0 #x1, x2, x3, x4, x5 = 0, 50, 50, 0, 0 #y1, y2, y3, y4, y5 = 50, 50, 0, 0, 50 #z = 5 last_tm = time.time() start_ms = time.time() elapsed_ms = time.time() i = 0 results = None fps = "00.0 fps" N = 10 ##rgb = bytearray(camera.resolution[0] * camera.resolution[1] * 3) #rgb = bytearray(320 * 320 * 3) #rawCapture = PiRGBArray(camera, size=camera.resolution) #stream = camera.capture_continuous(rawCapture, format="rgb", use_video_port=True) while True: #clock.tick(max_fps) #with picamera.array.PiRGBArray(camera, size=(mdl_dims, mdl_dims)) as stream: #for foo in camera.capture_continuous(stream, use_video_port=True, format='rgb'): #for f in stream: #start_ms = time.time() #frame = io.BytesIO(f.array) #frame_buf_val = np.frombuffer(frame.getvalue(), dtype=np.uint8) #results = engine.DetectWithInputTensor(frame_buf_val, top_k=10) #rawCapture.truncate(0) #elapsed_ms = time.time() - start_ms ##stream = io.BytesIO() ##camera.capture(stream, use_video_port=True, format='rgb') ##stream.seek(0) ##stream.readinto(rgb) ##frame_val = np.frombuffer(stream.getvalue(), dtype=np.uint8) ##start_ms = time.time() ##results = engine.DetectWithInputTensor(frame_val, top_k=max_obj) ##elapsed_ms = time.time() - start_ms ##img = pygame.image.frombuffer(rgb[0: ##(camera.resolution[0] * camera.resolution[1] * 3)], ##camera.resolution, 'RGB') img = pycam.get_image() img = pygame.transform.scale(img,(mdl_dims,mdl_dims)) img_arr = pygame.surfarray.pixels3d(img) img_arr = np.swapaxes(img_arr,0,1) #img_arr = pygame.PixelArray.transpose(img_arr) #requires pygame.PixelArray object img_arr = np.ascontiguousarray(img_arr) frame = io.BytesIO(img_arr) frame_buf_val = np.frombuffer(frame.getvalue(), dtype=np.uint8) #print(frame_buf_val) start_ms = time.time() results = engine.DetectWithInputTensor(frame_buf_val, threshold=thresh, top_k=max_obj) #frame.truncate(0) elapsed_ms = time.time() - start_ms screen = pygame.display.get_surface() #get the surface of the current active display resized_x,resized_y = size = screen.get_width(), screen.get_height() #print("x:", resized_x, " y:", resized_y) img = pygame.transform.scale(img,(resized_x, resized_y)) if img and video_off == False: screen.blit(img, (0,0)) #pygame.surfarray.blit_array(screen, img_arr) i += 1 if i > N: tm = time.time() fps = "fps:{:5.1f} ".format(i / (tm - last_tm)) i = 0 last_tm = tm fps_thresh = fps + " thresh:" + str(thresh) fps_fnt = fnt.render(fps_thresh, True, (255,255,0)) fps_width = fps_fnt.get_rect().width screen.blit(fps_fnt,((resized_x / 2) - (fps_width / 2), 20)) if results: num_obj = 0 for obj in results: num_obj = num_obj + 1 for obj in results: bbox = obj.bounding_box.flatten().tolist() label_id = int(round(obj.label_id,1)) class_label = "%s" % (labels[label_id]) fnt_class_label = fnt.render(class_label, True, (255,255,255)) fnt_class_label_width = fnt_class_label.get_rect().width screen.blit(fnt_class_label,(x1, y1-fnt_sz)) score = round(obj.score,2) x1 = round(bbox[0] * resized_x) y1 = round(bbox[1] * resized_y) x2 = round(bbox[2] * resized_x) y2 = round(bbox[3] * resized_y) rect_width = x2 - x1 rect_height = y2 - y1 class_score = "%.2f" % (score) fnt_class_score = fnt.render(class_score, True, (0,255,255)) fnt_class_score_width = fnt_class_score.get_rect().width screen.blit(fnt_class_score,(x2-fnt_class_score_width, y1-fnt_sz)) ms = "(%d%s%d) %s%.2fms" % (num_obj, "/", max_obj, "objects detected in ", elapsed_ms*1000) fnt_ms = fnt.render(ms, True, (255,255,255)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms,((resized_x / 2 ) - (fnt_ms_width / 2), 0)) bbox_rect = pygame.draw.rect(screen, (0,255,0), (x1, y1, rect_width, rect_height), 4) else: ms = "%s %.2fms" % ("No objects detected in", elapsed_ms*1000) fnt_ms = fnt.render(ms, True, (255,0,0)) fnt_ms_width = fnt_ms.get_rect().width screen.blit(fnt_ms,((resized_x / 2 ) - (fnt_ms_width / 2), 0)) for event in pygame.event.get(): keys = pygame.key.get_pressed() if(keys[pygame.K_ESCAPE] == 1): pycam.stop() #pygame.quit() ##camera.close() pygame.display.quit() sys.exit() elif event.type == pygame.VIDEORESIZE: screen = pygame.display.set_mode((event.w,event.h),pygame.RESIZABLE) #pygame.display.flip() pygame.display.update()