def detect_objects(threshold = 0.1, top_count=3): interpreter = common.make_interpreter(default_model) interpreter.allocate_tensors() labels = load_labels(default_labels) cap = cv2.VideoCapture(default_camera_idx) if cap.isOpened(): for i in range(0,15): ret, frame = cap.read() time.sleep(1/1000) if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=threshold, top_k=top_count) cv2_im = append_objs_to_img(cv2_im, objs, labels) cv2.imshow('detect', cv2_im) cv2.waitKey(50) def make(obj): return Result( percent = int(100 * obj.score), label = labels.get(obj.id, 'unknown') ) cap.release() return [make(obj) for obj in objs]
def __init__(self): print('Loading {} with {} labels.'.format(model, labels)) self.interpreter = common.make_interpreter(model) self.interpreter.allocate_tensors() self.labels = load_labels(labels) self.video = cv2.VideoCapture(0) self.file = open("/home/mendel/person_detected.txt","w")
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--model', help='File path of .tflite model.', default='inception_v4_299_quant_edgetpu.tflite') parser.add_argument( '--labels', help='File path of labels file.', default='imagenet_labels.txt') parser.add_argument( '--top_k', help='Number of classifications to list', type=int, default=1) args = parser.parse_args() print('Initializing TF Lite interpreter...') interpreter = common.make_interpreter(os.path.join(default_model_dir,args.model)) interpreter.allocate_tensors() labels = load_labels(os.path.join(default_model_dir, args.labels)) cap = cv2.VideoCapture(0) while (True): ret, frame = cap.read() cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) results = classify_image(interpreter, pil_im, args.top_k) for label_id, prob in results: cv2.putText(frame, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2) print('%s: %.5f' % (labels[label_id], prob)) cv2.imshow('Classification', frame) if cv2.waitKey(1) == ord('q'): break cap.release() cv2.destroyAllWindows()
def detect_objects(args): interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) dirname = args.images dirpath = Path('results/' + dirname) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) Path("results/" + dirname).mkdir(parents=True, exist_ok=True) for filename in glob.glob(dirname + "/*.jpeg"): print(filename) name = os.path.basename(filename) pil_im = Image.open(filename) open_cv_image = np.array(pil_im) snapshot_im = pil_im common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) #print(objs) open_cv_image = append_objs_to_img(open_cv_image, objs, labels) cv2_im_rgb = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB) (flag, encodedImage) = cv2.imencode(".jpeg", cv2_im_rgb) #print(flag) #print(encodedImage) f = open("./results/" + dirname + "/" + name, "wb") f.write(encodedImage) f.close()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) #cv2.imshow('frame', cv2_im) #cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) #pil_im = Image.fromarray(cv2_im_rgb) #handle_image_conversion(pil_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = "../coral/models" default_label_dir = "../coral/labels" default_model = "mobilenet_v2_1.0_224_quant_edgetpu.tflite" default_labels = "imagenet_labels.txt" parser = argparse.ArgumentParser() parser.add_argument( "--model", help=".tflite model path", default=os.path.join(default_model_dir, default_model), ) parser.add_argument( "--labels", help="label file path", default=os.path.join(default_label_dir, default_labels), ) args = parser.parse_args() with open(args.labels, "r") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 camera.annotate_text_size = 20 width, height, channels = common.input_image_size(interpreter) camera.start_preview() try: stream = io.BytesIO() fps = deque(maxlen=20) fps.append(time.time()) for foo in camera.capture_continuous(stream, format="rgb", use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) start_ms = time.time() common.input_tensor(interpreter)[:, :] = np.reshape( input, common.input_image_size(interpreter)) interpreter.invoke() results = get_output(interpreter, top_k=3, score_threshold=0) inference_ms = (time.time() - start_ms) * 1000.0 fps.append(time.time()) fps_ms = len(fps) / (fps[-1] - fps[0]) camera.annotate_text = "Inference: {:5.2f}ms FPS: {:3.1f}".format( inference_ms, fps_ms) for result in results: camera.annotate_text += "\n{:.0f}% {}".format( 100 * result[1], labels[result[0]]) print(camera.annotate_text) sleep(2) finally: camera.stop_preview()
def main(): default_model_dir = 'model' default_model = 'ssdlite_mobiledet_quant_postprocess_edgetpu.tflite' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.5, help='classifier score threshold') parser.add_argument('--videosrc', help='Which video source to use. ', default='/dev/video0') parser.add_argument('--videofmt', help='Input video format.', default='raw', choices=['raw', 'h264', 'jpeg']) args = parser.parse_args() interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() w, h, _ = common.input_image_size(interpreter) inference_size = (w, h) # Average fps over last 30 frames. fps_counter = common.avg_fps_counter(100) def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(interpreter, args.threshold, args.top_k) end_time = time.monotonic() text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, text_lines) result = gstreamer.run_pipeline(user_callback, src_size=(640, 480), appsink_size=inference_size, videosrc=args.videosrc, videofmt=args.videofmt)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite' default_labels = 'imagenet_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') parser.add_argument('--videosrc', help='Which video source to use. ', default='/dev/video0') parser.add_argument('--videofmt', help='Input video format.', default='raw', choices=['raw', 'h264', 'jpeg']) args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) w, h, _ = common.input_image_size(interpreter) inference_size = (w, h) # Average fps over last 30 frames. fps_counter = common.avg_fps_counter(30) def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance results = get_output(interpreter, args.top_k, args.threshold) end_time = time.monotonic() text_lines = [ ' ', 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format(result.score, labels.get(result.id, result.id))) print(' '.join(text_lines)) return generate_svg(src_size, text_lines) result = gstreamer.run_pipeline(user_callback, src_size=(640, 480), appsink_size=inference_size, videosrc=args.videosrc, videofmt=args.videofmt)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # imagezmq receiver image_hub = imagezmq.ImageHub(open_port='tcp://147.47.200.65:35556', REQ_REP=False) # REQ_REP=False: use PUB/SUB (non-block) #cap = cv2.VideoCapture(args.camera_idx) while True: # receive from zmq timestamp, frame = image_hub.recv_image() dt = datetime.fromtimestamp(timestamp) #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) start = time.monotonic() common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) inference_time = time.monotonic() - start inference_time = 'Inference time: %.2f ms (%.2f fps)' % (inference_time * 1000, 1.0 / inference_time) cv2_im = append_objs_to_img(cv2_im, objs, labels, inference_time, dt) #cv2_im = cv2.resize(cv2_im, (720, 720)) cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN) cv2.imshow("frame", cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite' default_labels = 'imagenet_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() pygame.init() pygame.camera.init() camlist = pygame.camera.list_cameras() print('By default using camera: ', camlist[-1]) camera = pygame.camera.Camera(camlist[-1], (640, 480)) width, height, channels = common.input_image_size(interpreter) camera.start() try: fps = deque(maxlen=20) fps.append(time.time()) while True: imagen = camera.get_image() imagen = pygame.transform.scale(imagen, (width, height)) input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8) start_ms = time.time() common.input_tensor(interpreter)[:, :] = np.reshape( input, (common.input_image_size(interpreter))) interpreter.invoke() results = get_output(interpreter, top_k=3, score_threshold=0) inference_ms = (time.time() - start_ms) * 1000.0 fps.append(time.time()) fps_ms = len(fps) / (fps[-1] - fps[0]) annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format( inference_ms, fps_ms) for result in results: annotate_text += '\n{:.0f}% {}'.format(100 * result[1], labels[result[0]]) print(annotate_text) finally: camera.stop()
def detect_object(args): global outputFrame, lock interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) if args.videosrc=='dev': cap = cv2.VideoCapture(args.camera_idx) else: if args.netsrc==None: print("--videosrc was set to net but --netsrc was not specified") sys.exit() cap = cv2.VideoCapture(args.netsrc) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) snapshot_im = pil_im common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) if args.displayBool == 'True': cv2.imshow('frame', cv2_im) # acquire the lock, set the output frame, and release the # lock with lock: outputFrame = cv2_im.copy() if (time.time() - last_save) >=1: take_snapshot(snapshot_im, objs, labels, exclude=args.exclude.split(','), include=args.include.split(',')) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def serve(): default_model_dir = '/media/mendel/detection-server/models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=1) parser.add_argument('--threshold', type=float, help='Detector threshold. ', default=0.7) parser.add_argument('--display', dest='display', action='store_true', help='Display object data. ') parser.set_defaults(display=False) args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(os.path.join(default_model_dir, args.model)) interpreter.allocate_tensors() labels = common.load_labels(os.path.join(default_model_dir, args.labels)) # Get native camera resolution. cap = cv2.VideoCapture(args.camera_idx) camera_res = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: # Start a thread to detect objects in camera frames. future = executor.submit(start_detector, args.camera_idx, interpreter, args.threshold, labels, camera_res, args.display) # Start other threads for the gprc server. server = grpc.server(executor) detection_server_pb2_grpc.add_DetectionServerServicer_to_server( DetectionServerServicer(camera_res), server) server.add_insecure_port('[::]:50051') server.start() # Show the value returned by the executor.submit call. # This will wait forever unless a runtime error is encountered. future.result() server.stop(None)
def __init__(self, configuration): self.log_directory = configuration['log_directory'] self.model_directory = configuration['model_directory'] self.model = os.path.join(self.model_directory, configuration['model']) self.labels_file = os.path.join(self.model_directory, configuration['labels']) self.top_k = int(configuration['top_k']) self.camera_id = int(configuration['camera_id']) self.score_threshold = float(configuration['score_threshold']) self.clip_duration_sec = int(configuration['clip_duration_sec']) self.expire_time = self.clip_duration_sec self.video_directory = configuration['video_directory'] self.camera_stream_url = configuration['camera_stream_url'] print('Loading {} with {} labels.'.format(self.model, self.labels_file)) self.interpreter = common.make_interpreter(self.model) self.interpreter.allocate_tensors() self.stream = cv2.VideoCapture(self.camera_stream_url)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) w, h, _ = common.input_image_size(interpreter) inference_size = (w, h) # Average fps over last 30 frames. fps_counter = common.avg_fps_counter(30) def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(interpreter, args.threshold, args.top_k) end_time = time.monotonic() text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines) result = gstreamer.run_pipeline(user_callback, appsink_size=inference_size)
def classify(model_type=ModelType.General, top_k=1): interpreter = common.make_interpreter(model_type.model_path()) interpreter.allocate_tensors() labels = load_labels(model_type.label_path()) cap = cv2.VideoCapture(0) if cap.isOpened(): for i in range(0,15): ret, frame = cap.read() time.sleep(1/1000) if not ret: break cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) results = classify_image(interpreter, pil_im, top_k) for label_id, prob in results: cv2.putText(cv2_im_rgb, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2) print('%s: %.5f' % (labels[label_id], prob)) cv2.imshow('Classification', cv2_im_rgb) cv2.waitKey(50) def make(obj): fs = "{0}({1})" parsed = parse.parse(fs, labels[obj[0]]) if parsed != None and len(parsed.fixed) > 1: tLabel = parsed[1] else: tLabel = labels[obj[0]] return Result( label = tLabel, percent = int(100 * obj[1]) ) cap.release() return [make(obj) for obj in results]
def main(): cam_w, cam_h = 640, 480 default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=5, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.5, help='classifier score threshold') args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) pygame.init() pygame.font.init() font = pygame.font.SysFont('Arial', 20) pygame.camera.init() camlist = pygame.camera.list_cameras() w, h, _ = common.input_image_size(interpreter) print('By default using camera: ', camlist[-1]) camera = pygame.camera.Camera(camlist[-1], (cam_w, cam_h)) try: display = pygame.display.set_mode((cam_w, cam_h), 0) except pygame.error as e: sys.stderr.write("\nERROR: Unable to open a display window. Make sure a monitor is attached and that " "the DISPLAY environment variable is set. Example: \n" ">export DISPLAY=\":0\" \n") raise e red = pygame.Color(255, 0, 0) camera.start() try: last_time = time.monotonic() while True: mysurface = camera.get_image() imagen = pygame.transform.scale(mysurface, (w, h)) input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8) start_time = time.monotonic() common.input_tensor(interpreter)[:,:] = np.reshape(input, (common.input_image_size(interpreter))) interpreter.invoke() results = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) stop_time = time.monotonic() inference_ms = (stop_time - start_time)*1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format(inference_ms, fps_ms) for result in results: x0, y0, x1, y1 = list(result.bbox) rect = pygame.Rect(x0 * cam_w, y0 * cam_h, (x1 - x0) * cam_w, (y1 - y0) * cam_h) pygame.draw.rect(mysurface, red, rect, 1) label = '{:.0f}% {}'.format(100*result.score, labels.get(result.id, result.id)) text = font.render(label, True, red) print(label, ' ', end='') mysurface.blit(text, (x0 * cam_w , y0 * cam_h)) text = font.render(annotate_text, True, red) print(annotate_text) mysurface.blit(text, (0, 0)) display.blit(mysurface, (0, 0)) pygame.display.flip() finally: camera.stop()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default=0) parser.add_argument( '--model', type=str, help='Pose model to use. ', default= 'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') parser.add_argument('--pose3d', type=str, help='3D Pose model to use. ', default='models/3dpose_gan_edgetpu.tflite') parser.add_argument('--dataset', type=str, help='Type of dataset. ', default="CORAL") parser.add_argument('--rot', type=int, help='Number of degree to rotate in 3D pose. ', default=90) args = parser.parse_args() engine = PoseEngine(args.model) _, image_height, image_width, _ = engine.get_input_tensor_shape() interpreter_3dpose = None if len(args.pose3d) > 0: interpreter_3dpose = common.make_interpreter(args.pose3d) interpreter_3dpose.allocate_tensors() print("Load all models done!") cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(cv2_im_rgb) pil_image.resize((image_width, image_height), Image.NEAREST) start_time = time.monotonic() poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image)) print('2D Pose Inference time: %.fms (%.2f fps)' % (inference_time, 1000 / inference_time)) cv2_im = draw_skel_and_kp(cv2_im, poses, args.rot, interpreter_3dpose) end_time = time.monotonic() process_time = 1000 * (end_time - start_time) print('3D Pose End-to-End Inference time: %.fms (%.2f fps)' % (process_time, 1000 / process_time)) cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("frame", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' default_labels = 'fer_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=1, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') parser.add_argument('--videosrc', help='Which video source to use. ', default='/dev/video0') parser.add_argument('--videofmt', help='Input video format.', default='raw', choices=['raw', 'h264', 'jpeg']) args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) face_interpreter = common.make_interpreter( os.path.join(default_model_dir, default_model)) face_interpreter.allocate_tensors() # fer interpreter fer_interpreter = common.make_interpreter(args.model) fer_interpreter.allocate_tensors() labels = load_labels(args.labels) w, h, _ = common.input_image_size(face_interpreter) inference_size = (w, h) # Average fps over last 30 frames. fps_counter = common.avg_fps_counter(30) def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(face_interpreter, input_tensor) face_interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(face_interpreter, args.threshold, args.top_k) # Get face detected part from PIL import Image im = Image.fromarray(common.input_tensor(face_interpreter)) src_w, src_h = src_size inf_w, inf_h = inference_size results = [] emo_objs = [] for obj in objs: x0, y0, x1, y1 = list(obj.bbox) # Relative coordinates. x, y, w, h = x0, y0, x1 - x0, y1 - y0 # Absolute coordinates, input tensor space. x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int( h * inf_h) crop_rectangle = (x, y, x + w, y + h) # get face face = im.crop(crop_rectangle) face = np.array(face) # convert to grayscale #face = cv2.cvtColor(face, cv2.COLOR_RGB2GRAY) print(face.shape) face = cv2.resize(face, (224, 224)) face = face.astype(np.uint8) #face /= float(face.max()) face = np.reshape(face.flatten(), (224, 224, 3)) # invoke fer interpreter common.set_input2(fer_interpreter, face) fer_interpreter.invoke() # process results results = get_emotion(fer_interpreter) if len(results) > 0: setattr(obj, "id", results[0].id) setattr(obj, "score", results[0].score) emo_objs.append(obj) objs = emo_objs end_time = time.monotonic() text_lines = [] if len(objs) > 0: text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format( result.score, labels.get(result.id, result.id))) #print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines) result = gstreamer.run_pipeline(user_callback, src_size=(640, 480), appsink_size=inference_size, videosrc=args.videosrc, videofmt=args.videofmt)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # csv writer f = open('face_output.csv', 'w') with f: fnames = [ 'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin', 'xmax', 'ymax', 'score' ] writer = csv.DictWriter(f, fieldnames=fnames) writer.writeheader() # read frames for image_path in sorted( glob.glob('/home/mendel/dataset/Store/frames/Camera01/*.jpg')): image_name = os.path.splitext(os.path.basename(image_path))[0] #print(image_name) pil_im = Image.open(image_path) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) (width, height) = pil_im.size idx = -1 for obj in objs: x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) score = obj.score label = 'face' idx += 1 writer.writerow({ 'timestamp': image_name, 'idx': idx, 'label': label, 'width': width, 'height': height, 'xmin': x0, 'ymin': y0, 'xmax': x1, 'ymax': y1, 'score': score })
'bitrate': 1000000, 'source': '/dev/video2:YUY2:640x480:30/1', 'window': 10, 'top_k': 3, 'threshold': 0.1, 'print': False, 'camera_idx': 2, 'labels': os.path.join(default_model_dir, default_labels) }) #Start up camera cap = open_available_stream() #Load up model, labels, and interpreter print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) #Start networktables NetworkTables.initialize() sd = NetworkTables.getTable("SmartDashboard2") def main(): #Run our flask app app.run(host='0.0.0.0', port=5000, use_reloader=False) #Cleaning up cap.release()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument( '--top_k', type=int, default=5, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.5, help='classifier score threshold') parser.add_argument('--video_width', type=int, help='Width resolution of the Video Capture', default=1920) parser.add_argument('--video_height', type=int, help='Width resolution of the Video Capture', default=1080) parser.add_argument( '--confirmations', type=int, help= 'Frames detected with one or more person(s) needed before sending out an alert', default=30) parser.add_argument( '--time_period', type=int, help='Maximum time for confirmation check (in seconds)', default=10) parser.add_argument('--alert_cooldown', type=int, help='Cooldown time between alerts (in seconds)', default=600) args = parser.parse_args() print('Loading {}.'.format(args.model)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() cap = cv2.VideoCapture(args.camera_idx) # set VideoCapture resolution cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.video_width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.video_height) cooldown_until = 0 confirmations = array.array('d', [0] * args.confirmations) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) persons = list(filter(lambda x: x.id == PERSON_COCO_INDEX, objs)) if persons: current_time = time.time() confirmations.append(current_time) if confirmations[-1] - confirmations.pop( 0 ) <= args.time_period and confirmations[-1] >= cooldown_until: print("alerted at", current_time) image_link = upload_image(pil_im) send_alert("person detected at {}. \n{}".format( datetime.datetime.now().isoformat(), image_link)) cooldown_until = current_time + args.alert_cooldown if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) cap = cv2.VideoCapture(0) ###############################For sending to the socket #cap.set(3, 320); #cap.set(4, 240); img_counter = 0 encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 90] ######################################################################33 while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) #################SENDING TO THE SERVER#################3 result, frame = cv2.imencode('.jpg', cv2_im, encode_param) data = pickle.dumps(frame, 0) size = len(data) clientsocket.sendall(struct.pack(">L", size) + data) #connection.sendall(struct.pack(">L", size) + data) img_counter += 1 ################################################################# #cv2.imshow('frame', cv2_im) #if cv2.waitKey(1) & 0xFF == ord('q'): # break cap.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=10, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.3, help='classifier score threshold') parser.add_argument('--class_ids', nargs='*', type=int, default=0, help='Array of class id') parser.add_argument('--input_files', default='/home/mendel/dataset/*.jpg', help='Input files') parser.add_argument('--csv_out', default='detect_output.csv', help='csv output file') args = parser.parse_args() if args.class_ids == 0: args.class_ids = [0] print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # csv writer f = open(args.csv_out, 'w') with f: fnames = [ 'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin', 'xmax', 'ymax', 'score' ] writer = csv.DictWriter(f, fieldnames=fnames) writer.writeheader() # read frames inference_time = [] for image_path in sorted(glob.glob(args.input_files)): image_name = os.path.splitext(os.path.basename(image_path))[0] #print(image_name) pil_im = Image.open(image_path) # inference start = time.time() common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k, class_list=args.class_ids) inference_time.append(time.time() - start) # return results (width, height) = pil_im.size idx = -1 for obj in objs: x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) score = obj.score label = labels.get(obj.id, obj.id) idx += 1 writer.writerow({ 'timestamp': image_name, 'idx': idx, 'label': label, 'width': width, 'height': height, 'xmin': x0, 'ymin': y0, 'xmax': x1, 'ymax': y1, 'score': score }) print("Inference time : {:.3f} ms".format( sum(inference_time) * 1000 / len(inference_time))) print("Frames per second : {:.2f} fps".format( len(inference_time) / sum(inference_time)))
def main(): # default_model_dir = '../all_models' # default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' # default_labels = 'coco_labels.txt' default_model_dir = '../cmpe297_model' # default_model = 'ssdlite_6C_SB_10K_mobiledet_screws.tflite' #5 classes small BB # default_model = 'ssdlite_6C_SB_10K_mobiledet_screws_edgetpu.tflite' #5 classes small BB # default_model = 'ssdlite_6C_SB_25K_mobiledet_screws.tflite' #5 classes small BB default_model = 'ssdlite_6C_SB_25K_mobiledet_screws_edgetpu.tflite' #5 classes small BB # default_model = 'ssdlite_6C_BB_10K_mobiledet_screws.tflite' #5 classes big BB 1K # default_model = 'ssdlite_6C_BB_10K_mobiledet_screws_edgetpu.tflite' #5 classes big BB 1K default_labels = 'ssdlite_mobiledet_screws_6c_labels.txt' # default_model = 'ssdlite_2C_BB_10K_mobiledet_screws.tflite' #5 classes big BB 1K # default_model = 'ssdlite_2C_BB_10K_mobiledet_screws_edgetpu.tflite' #5 classes big BB 1K # default_labels = 'ssdlite_mobiledet_screws_2c_labels.txt' # default_model_dir = '../cmpe297_model' # default_model = 'Sergio_v3_ssdlite_mobiledet_dog_vs_cat.tflite' # # default_model = 'Sergio_v3_sdlite_mobiledet_dog_vs_cat_edgetpu.tflite' # default_labels = 'cat_vs_doc_All.txt' # default_model = 'mobilenet_v2_1.0_224_quant_edgetpu_cmpe297.tflite' # # default_model = 'mobilenet_v2_1.0_224_quant_cmpe297.tflite' # default_labels = 'flower_labels_cmpe297.txt' # default_model = 'eager_mobilenet_v2_1.0_224_quant.tflite' #no edgeTPU # default_model = 'eager_mobilenet_v2_1.0_224_quant_edgetpu.tflite' #eager # # default_model = 'eager2_mobilenet_v2_1.0_224_quant.tflite' #eager # default_model = 'eager2_mobilenet_v2_1.0_224_quant_edgetpu.tflite' #eager # default_labels = 'duckylabels.txt' # default_model = 'quant_coco-tiny-v3-relu.tflite' # default_model = 'quant_coco-tiny-v3-relu_edgetpu.tflite' # default_model = 'ssdlite_mobiledet_dog_vs_cat_edgetpu.tflite' # default_labels = 'cat_vs_doc.txt' # default_model = 'cmpe297_ssdlite_mobiledet_dog.tflite' # default_model = 'cmpe297_ssdlite_mobiledet_dog_edgetpu.tflite' # default_model = 'cmpe297v2_ssdlite_mobiledet_dog_edgetpu.tflite' # default_labels = 'dogs_labels.txt' # default_model = 'ssdlite_mobiledet_dog_vs_cat_edgetpuAcha.tflite' # default_labels = 'cat_vs_doc_All.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') parser.add_argument('--videosrc', help='Which video source to use. ', default='/dev/video0') parser.add_argument('--videofmt', help='Input video format.', default='raw', choices=['raw', 'h264', 'jpeg']) args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) w, h, _ = common.input_image_size(interpreter) inference_size = (w, h) # Average fps over last 30 frames. fps_counter = common.avg_fps_counter(30) def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() common.set_input(interpreter, input_tensor) interpreter.invoke() # For larger input image sizes, use the edgetpu.classification.engine for better performance objs = get_output(interpreter, args.threshold, args.top_k) # print(objs[0].bbox) end_time = time.monotonic() text_lines = [ 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] print(' '.join(text_lines)) return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines) result = gstreamer.run_pipeline(user_callback, src_size=(640, 480), appsink_size=inference_size, videosrc=args.videosrc, videofmt=args.videofmt)
def main(): default_model_dir = 'all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() multiTracker = cv2.MultiTracker_create() #Initialize logging files logging.basicConfig(filename='storage/results.log', format='%(asctime)s-%(message)s', level=logging.DEBUG) print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) vs = PiVideoStream(resolution=(2048, 1536), framerate=32).start() #cap = cv2.VideoCapture(args.camera_idx) cap = vs.stream #cap.set(3, 1920) #cap.set(4, 1440) # 4:3 resolutions # 640×480, 800×600, 960×720, 1024×768, 1280×960, 1400×1050, # 1440×1080 , 1600×1200, 1856×1392, 1920×1440, 2048×1536 # 5 MP #cap.set(3, 2048) #cap.set(4, 1536) bboxes = [] colors = [] visitation = [] trackers = [] started_tracking = None last_tracked = None visitation_id = None save_one_with_boxes = False recording = False out = None fps = FPS().start() is_stopped = False current_fps = 4.0 while vs is not None: try: frame = vs.read() if frame is not None: if fps._numFrames < 500: fps.update() else: fps.stop() current_fps = fps.fps() logging.info("[INFO] elasped time: {:.2f}".format( fps.elapsed())) logging.info("[INFO] approx. FPS: {:.2f}".format( fps.fps())) fps = FPS().start() success, boxes = multiTracker.update(frame) if success: last_tracked = time.time() if len(boxes) > 0: logging.info("success {}".format(success)) logging.info("boxes {}".format(boxes)) cv2_im = frame #cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) height, width, channels = cv2_im.shape bird_detected = False boxes_to_draw = [] for obj in objs: x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) percent = int(100 * obj.score) object_label = labels.get(obj.id, obj.id) label = '{}% {}'.format(percent, object_label) hdd = psutil.disk_usage('/') if object_label == 'bird' and percent > 40: bird_detected = True new_bird = True for bbox in boxes: if intersects(bbox, obj.bbox): logging.info("intersected.. same bird") new_bird = False if new_bird and len(bboxes) == 0: logging.info("found a new bird") visitation_id = uuid.uuid4() started_tracking = time.time() recording = True save_one_with_boxes = True bboxes.append(obj.bbox) colors.append((randint(64, 255), randint(64, 255), randint(64, 255))) tracker = cv2.TrackerCSRT_create() trackers.append(tracker) multiTracker.add(tracker, cv2_im, obj.bbox) if hdd.percent < 95: boxed_image_path = "storage/detected/boxed_{}_{}_{}.png".format( time.strftime("%Y-%m-%d_%H-%M-%S"), percent, visitation_id) full_image_path = "storage/detected/full_{}_{}_{}.png".format( time.strftime("%Y-%m-%d_%H-%M-%S"), percent, visitation_id) cv2.imwrite(boxed_image_path, cv2_im[y0:y1, x0:x1]) if percent > 95: cv2.imwrite(full_image_path, cv2_im) else: logging.info("Not enough disk space") percent = int(100 * obj.score) object_label = labels.get(obj.id, obj.id) label = '{}% {}'.format(percent, object_label) # postpone drawing so we don't get lines in the photos boxes_to_draw.append({ "p1": (x0, y0), "p2": (x1, y1), "label": label, "label_p": (x0, y0 + 30) }) for box in boxes_to_draw: cv2_im = cv2.rectangle(cv2_im, box["p1"], box["p2"], (0, 255, 0), 2) cv2_im = cv2.putText(cv2_im, box["label"], box["label_p"], cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2) if recording == True: if out == None: fourcc = cv2.VideoWriter_fourcc(*'X264') out = cv2.VideoWriter( "storage/video/{}.avi".format(visitation_id), fourcc, 4.0, (2048, 1536)) out.write(cv2_im) if bird_detected == True and save_one_with_boxes == True: with_boxes_image_path = "storage/with_boxes/full_{}_{}.png".format( time.strftime("%Y-%m-%d_%H-%M-%S"), visitation_id) cv2.imwrite(with_boxes_image_path, cv2_im) save_one_with_boxes = False if bird_detected == False and len(trackers) > 0: now = time.time() if now - last_tracked > 60: logging.info("visitation {} lasted {} seconds".format( visitation_id, now - started_tracking)) logging.info("clearing trackers") for tracker in trackers: tracker.clear() multiTracker = cv2.MultiTracker_create() boxes = [] colors = [] trackers = [] bboxes = [] recording = False out.release() out = None for i, newbox in enumerate(boxes): x0, y0, x1, y1 = list(newbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), (0, 0, 255), 2) cv2.namedWindow('Leroy', cv2.WINDOW_NORMAL) cv2.resizeWindow('Leroy', 800, 600) cv2.imshow('Leroy', cv2_im) except KeyboardInterrupt: print('Interrupted') try: sys.exit(0) except SystemExit: os._exit(0) except: logging.exception('Something happened.') if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() out.release() vs.stop()
def main(): default_model_dir = "../coral/models" default_label_dir = "../coral/labels" default_model = "ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite" default_labels = "coco_labels.txt" parser = argparse.ArgumentParser() parser.add_argument( "--model", help=".tflite model path", default=os.path.join(default_model_dir, default_model), ) parser.add_argument( "--labels", help="label file path", default=os.path.join(default_label_dir, default_labels), ) parser.add_argument( "--top_k", type=int, default=3, help="number of categories with highest score to display", ) parser.add_argument("--camera_idx", type=int, help="Index of which video source to use. ", default=0) parser.add_argument("--threshold", type=float, default=0.1, help="classifier score threshold") args = parser.parse_args() print("Loading {} with {} labels.".format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) cap = cv2.VideoCapture(args.camera_idx) count = 0 while cap.isOpened(): ret, frame = cap.read() count += 1 if not ret: break if count == 20: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) # cv2.imshow("frame", cv2_im) # if cv2.waitKey(1) & 0xFF == ord("q"): # break # Saving the image cv2.imwrite("detectImage.jpg", cv2_im) cap.release() cv2.destroyAllWindows()
def main(): args = get_arguments() MODEL = args.model LABELS = args.labels TOP_K = args.top_k THRESHOLD = args.threshold CAPTURE_MODE = args.capture_mode INPUT_VIDEO_DIR = args.input_video_dir INPUT_VIDEO = str(os.path.join(INPUT_VIDEO_DIR, args.input_video)) OUTPUT_VIDEO_DIR = args.output_video_dir OUTPUT_VIDEO = str(os.path.join(OUTPUT_VIDEO_DIR, args.output_video)) OUTPUT_VIDEO_RES = args.output_video_res OUTPUT_VIDEO_FPS = args.output_video_fps OUTPUT_VIDEO_TIME = args.output_video_time RECORD_MODE = args.record_mode MONITOR = args.monitor DEBUG_MODE = args.debug_mode # Define the hardware ui = get_ui() # Create an interpreter engine based on the tflite model path given debug_print("Loading {} with {} labels.".format(MODEL, LABELS)) interpreter = common.make_interpreter(MODEL) interpreter.allocate_tensors() cap = None out = None # Define camera or input video properties cap = load_cap(CAPTURE_MODE, INPUT_VIDEO) cap_fps = cap.get(cv2.CAP_PROP_FPS) cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # Define output file if recording out = load_out(cap, CAPTURE_MODE, OUTPUT_VIDEO, OUTPUT_VIDEO_RES, cap_fps, OUTPUT_VIDEO_FPS) labels = load_labels(LABELS) frame_count = 0 # Read frame of the capture and feed it to the interpreter. Feed out an editted frame with detected objects encapuslated by bounding box. # Based on the object(s) captured, replay bluetooth message to the ardunio that controls the stepper motor and animatronics. while cap.isOpened(): ret, frame = cap.read() if not ret: break objs = [] cv2_im, objs = detect(frame, interpreter, labels, THRESHOLD, TOP_K) display_frame(cv2_im) relay_bluetooth_detection(ui, objs, labels, cap_width, cap_height) if RECORD_MODE: out.write(cv2_im) if DEBUG_MODE: frame_count += 1 seconds = (frame_count/cap_fps)%60 debug_print(seconds) if (cv2.waitKey(1) & 0xFF == ord('q')) or ((RECORD_MODE or DEBUG_MODE) and seconds > OUTPUT_VIDEO_TIME): debug_print("Ending capture") break cleanup(cap, out) print("Successful termination")
def main(): default_model_dir = './' #'../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.5, help='classifier score threshold') parser.add_argument('--min_angle', type=float, default=0.0, help='minimum angle for sweep') parser.add_argument('--max_angle', type=float, default=180.0, help='maximum angle for sweep') args = parser.parse_args() print('Initializing servo') servo, servoData = ServoControls.init( 0, sweepStepTracking=0.1, minAngle=args.min_angle, maxAngle=args.max_angle) #SERVO_PIN, 50) servoThread = _thread.start_new_thread(control_servo, (servo, servoData)) print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) # cv2.VideoCapture.set(v_c, cv2.CAP_PROP_FPS, 15) cap = cv2.VideoCapture(args.camera_idx) # Read first frame to get window frame shape _, frame = cap.read() if frame is None: raise Exception('Image not found!') frameH, frameW, frameChannels = frame.shape lastTargetLost = None lastTargetLostTime = datetime.datetime.now() targetState = TargetState.UNKNOWN play_sound('searching.mp3') while cap.isOpened(): ret, frame = cap.read() if not ret: break #frame = cv2.flip(frame, flipCode=1) frame = imutils.rotate(frame, 90) h, w, layers = frame.shape aspect_ratio = w / h cv2_im = frame # cv2.resize(frame, (1920, 1080)) cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) face = next(filter(lambda a: a.id == 0, objs), None) if face != None: if targetState == TargetState.UNKNOWN: targetState = TargetState.ACQUIRED height, width, channels = cv2_im.shape x0, y0, x1, y1 = list(face.bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) lastTargetLost = 0 servoData['targetCoordinates'] = [ round(abs((x0 + (x1)) / 2)), round(abs((y0 + (y1)) / 2)) ] else: # target may have been lost if targetState == TargetState.TRACKING: targetState = TargetState.LOST # track lost time lastTargetLostTime = datetime.datetime.now() if targetState == TargetState.LOST and ( lastTargetLostTime == None or (datetime.datetime.now() - lastTargetLostTime).seconds > 2): # if lost for over a second, reset targetState back to default servoData['targetCoordinates'] = [-1, -1] play_sound('are-still-there.mp3') targetState = TargetState.UNKNOWN lastTargetLostTime = None if targetState == TargetState.ACQUIRED: play_sound(TARGET_ACQUIRED) targetState = TargetState.TRACKING cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break RUNNING = False cap.release() cv2.destroyAllWindows()
def main(): args = get_arguments() MODEL = args.model LABELS = args.labels TOP_K = args.top_k THRESHOLD = args.threshold CAPTURE_MODE = args.capture_mode INPUT_VIDEO_DIR = args.input_video_dir INPUT_VIDEO = str(os.path.join(INPUT_VIDEO_DIR, args.input_video)) OUTPUT_VIDEO_DIR = args.output_video_dir OUTPUT_VIDEO = str(os.path.join(OUTPUT_VIDEO_DIR, args.output_video)) OUTPUT_VIDEO_RES = args.output_video_res OUTPUT_VIDEO_FPS = args.output_video_fps OUTPUT_VIDEO_TIME = args.output_video_time RECORD_MODE = args.record_mode MONITOR = args.monitor DEBUG_MODE = args.debug_mode ui = get_ui() debug_print("Loading {} with {} labels.".format(MODEL, LABELS)) interpreter = common.make_interpreter(MODEL) interpreter.allocate_tensors() cap = None out = None cap = load_cap(CAPTURE_MODE, INPUT_VIDEO) cap_fps = cap.get(cv2.CAP_PROP_FPS) cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) out = load_out(cap, CAPTURE_MODE, OUTPUT_VIDEO, OUTPUT_VIDEO_RES, cap_fps, OUTPUT_VIDEO_FPS) labels = load_labels(LABELS) frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break objs = [] cv2_im, objs = detect(frame, interpreter, labels, THRESHOLD, TOP_K) display_frame(cv2_im) relay_bluetooth_detection(ui, objs, cap_width, cap_height) if RECORD_MODE: out.write(cv2_im) frame_count += 1 seconds = (frame_count / cap_fps) % 60 debug_print(seconds) if (cv2.waitKey(1) & 0xFF == ord('q')) or (RECORD_MODE and seconds > OUTPUT_VIDEO_TIME): debug_print("Ending capture") break cleanup(cap, out) print("Successful termination")
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = common.make_interpreter(args.model) interpreter.allocate_tensors() labels = load_labels(args.labels) cap = cv2.VideoCapture(1) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = imutils.resize(frame, width=640) gray = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2GRAY) prev_yawn_status = yawnStatus cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) rects = detector(gray, 0) for rect in rects: shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] mouth = shape[mStart:mEnd] leftEAR = eye_aspect_ratio(leftEye) rightEAR = eye_aspect_ratio(rightEye) mouEAR = mouth_aspect_ratio(mouth) ear = (leftEAR + rightEAR) / 2.0 leftEyeHull = cv2.convexHull(leftEye) rightEyeHull = cv2.convexHull(rightEye) mouthHull = cv2.convexHull(mouth) if ear < EYE_AR_THRESH: COUNTER += 1 cv2.putText(cv2_im, "Eyes Closed ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) if COUNTER >= EYE_AR_CONSEC_FRAMES: cv2.putText(cv2_im, "DROWSINESS ALERT!", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) if not ALARM_ON: ALARM_ON = True threadStatusQ.put(not ALARM_ON) thread = Thread(target=soundAlert, args=(sound_path, threadStatusQ,)) thread.setDaemon(True) thread.start() else: ALARM_ON=False else: COUNTER = 0 cv2.putText(cv2_im, "Eyes Open ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) if mouEAR > MOU_AR_THRESH: cv2.putText(cv2_im, "Yawning ", (10, 70),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),2) yawnStatus = True output_text = "Yawn Count: " + str(yawns + 1) cv2.putText(cv2_im, output_text, (10,100),cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255,0,0),2) else: yawnStatus = False if prev_yawn_status == True and yawnStatus == False: yawns+=1 cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()