class DepthAI: def __init__(self,file=None,camera=False): print("Loading pipeline...") self.file = file self.camera = camera self.create_pipeline() self.start_pipeline() self.fps = FPS() self.fps.start() def create_pipeline(self): print("Creating pipeline...") self.pipeline = depthai.Pipeline() if self.camera: print("Creating Color Camera...") self.cam = self.pipeline.createColorCamera() self.cam.setPreviewSize(self._cam_size[0],self._cam_size[1]) self.cam.setResolution(depthai.ColorCameraProperties.SensorResolution.THE_1080_P) self.cam.setInterleaved(False) self.cam.setBoardSocket(depthai.CameraBoardSocket.RGB) cam_xout = self.pipeline.createXLinkOut() cam_xout.setStreamName("cam_out") self.cam.preview.link(cam_xout.input) self.create_nns() def create_nns(self): pass def create_models(self, model_path,model_name,first_model=False): print(f"Start creating{model_path}Neural Networks") model_nn = self.pipeline.createNeuralNetwork() model_nn.setBlobPath(str(Path(model_path).resolve().absolute())) if first_model and self.camera: self.cam.preview.link(model_nn.input) else: model_in = self.pipeline.createXLinkIn() model_in.setStreamName(f"{model_name}_in") model_in.out.link(model_nn.input) model_nn_xout = self.pipeline.createXLinkOut() model_nn_xout.setStreamName(f"{model_name}_nn") model_nn.out.link(model_nn_xout.input) def create_mobilenet_nn(self,model_path,model_name,first_model=False,conf=0.5): print(f"Start creating{model_path}Neural Networks") model_nn = self.pipeline.createMobileNetDetectionNetwork() model_nn.setBlobPath(str(Path(model_path).resolve().absolute())) model_nn.setConfidenceThreshold(conf) model_nn.input.setBlocking(False) if first_model and self.camera: self.cam.preview.link(model_nn.input) else: model_in = self.pipeline.createXLinkIn() model_in.setStreamName(f"{model_name}_in") model_in.out.link(model_nn.input) model_nn_xout = self.pipeline.createXLinkOut() model_nn_xout.setStreamName(f"{model_name}_nn") model_nn.out.link(model_nn_xout.input) def create_yolo_nn(self,model_path,model_name,first_model=False): print(f"Start creating{model_path}Neural Networks") model_nn = self.pipeline.createYoloDetectionNetwork() model_nn.setBlobPath(str(Path(model_path).resolve().absolute())) model_nn.setConfidenceThreshold(conf) model_nn.input.setBlocking(False) if first_model: if self.camera: self.cam.preview.link(model_nn.input) else: model_in = self.pipeline.createXLinkIn() model_in.setStreamName(f"{model_name}_in") model_in.out.link(model_nn.input) model_nn_xout = self.pipeline.createXLinkOut() model_nn_xout.setStreamName(f"{model_name}_nn") model_nn.out.link(model_nn_xout.input) def start_pipeline(self): self.device = depthai.Device(self.pipeline) print("Starting pipeline...") self.device.startPipeline() if self.camera: self.cam_out = self.device.getOutputQueue("cam_out", 4, False) self.start_nns() def start_nns(self): pass def full_frame_cords(self, cords): original_cords = self.face_coords[0] return [ original_cords[0 if i % 2 == 0 else 1] + val for i, val in enumerate(cords) ] def full_frame_bbox(self, bbox): relative_cords = self.full_frame_cords(bbox) height, width = self.frame.shape[:2] y_min = max(0, relative_cords[1]) y_max = min(height, relative_cords[3]) x_min = max(0, relative_cords[0]) x_max = min(width, relative_cords[2]) result_frame = self.frame[y_min:y_max, x_min:x_max] return result_frame, relative_cords def draw_bbox(self, bbox, color): cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) def run_video(self): cap = cv2.VideoCapture(str(Path(self.file).resolve().absolute())) while cap.isOpened(): read_correctly, self.frame = cap.read() if not read_correctly: break try: self.parse() except StopIteration: break cap.release() def run_camera(self): while True: in_rgb = self.cam_out.tryGet() if in_rgb is not None: shape = (3,in_rgb.getHeight(),in_rgb.getWidth()) self.frame = in_rgb.getData().reshape(shape).transpose(1, 2, 0).astype(np.uint8) self.frame = np.ascontiguousarray(self.frame) try: self.parse() except StopIteration: break def parse(self): if debug: self.debug_frame = self.frame.copy() self.parse_run() if debug: aspect_ratio = self.frame.shape[1] / self.frame.shape[0] cv2.imshow("Camera_view", cv2.resize(self.debug_frame, ( int(900), int(900 / aspect_ratio)))) if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() raise StopIteration() def parse_run(self): pass @property def cam_size(self): return self._cam_size @cam_size.setter def cam_size(self,v): self._cam_size = v @property def get_cam_size(self): return (self._cam_size[0],self._cam_size[1]) def run(self): if self.file is not None: self.run_video() else: self.run_camera() self.fps.stop() print("FPS:{:.2f}".format(self.fps.fps())) del self.device
class Main: def __init__(self, file=None, camera=False): print("Loading pipeline...") self.file = file self.camera = camera self.pipeline = create_pipeline(self.camera) self.start_pipeline() self.COUNTER = 0 self.mCOUNTER = 0 self.hCOUNTER = 0 self.TOTAL = 0 self.mTOTAL = 0 self.hTOTAL = 0 self.fps = FPS() self.fps.start() def start_pipeline(self): self.device = depthai.Device(self.pipeline) print("Starting pipeline...") if self.camera: self.cam_out = self.device.getOutputQueue("cam_out", 4, False) else: self.face_in = self.device.getInputQueue("face_in", 4, False) self.face_nn = self.device.getOutputQueue("face_nn", 4, False) self.land68_in = self.device.getInputQueue("land68_in", 4, False) self.land68_nn = self.device.getOutputQueue("land68_nn", 4, False) def draw_bbox(self, bbox, color): cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) def run_face(self): if self.camera: nn_data = self.face_nn.tryGet() else: nn_data = run_nn(self.face_in, self.face_nn, {"data": to_planar(self.frame, (300, 300))}) results = to_bbox_result(nn_data) self.face_coords = [ frame_norm(self.frame, *obj[3:7]) for obj in results if obj[2] > 0.4 ] if len(self.face_coords) == 0: return False if len(self.face_coords) > 0: for face_coord in self.face_coords: face_coord[0] -= 15 face_coord[1] -= 15 face_coord[2] += 15 face_coord[3] += 15 self.face_frame = [ self.frame[face_coord[1]:face_coord[3], face_coord[0]:face_coord[2]] for face_coord in self.face_coords ] if debug: for bbox in self.face_coords: self.draw_bbox(bbox, (10, 245, 10)) return True def run_land68(self, face_frame, count): try: nn_data = run_nn(self.land68_in, self.land68_nn, {"data": to_planar(face_frame, (160, 160))}) out = to_nn_result(nn_data) result = frame_norm(face_frame, *out) eye_left = [] eye_right = [] mouth = [] hand_points = [] for i in range(72, 84, 2): eye_left.append((out[i], out[i + 1])) for i in range(84, 96, 2): eye_right.append((out[i], out[i + 1])) for i in range(96, len(result), 2): if i == 100 or i == 116 or i == 104 or i == 112 or i == 96 or i == 108: mouth.append(np.array([out[i], out[i + 1]])) for i in range(16, 110, 2): if i == 16 or i == 60 or i == 72 or i == 90 or i == 96 or i == 108: cv2.circle(self.debug_frame, (result[i] + self.face_coords[count][0], result[i + 1] + self.face_coords[count][1]), 2, (255, 0, 0), thickness=1, lineType=8, shift=0) hand_points.append( (result[i] + self.face_coords[count][0], result[i + 1] + self.face_coords[count][1])) ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = get_pose_estimation( self.frame.shape, np.array(hand_points, dtype='double')) ret, pitch, yaw, roll = get_euler_angle(rotation_vector) (nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs) p1 = (int(hand_points[1][0]), int(hand_points[1][1])) p2 = (int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1])) cv2.line(self.debug_frame, p1, p2, (255, 0, 0), 2) euler_angle_str = 'Y:{}, X:{}, Z:{}'.format(pitch, yaw, roll) cv2.putText(self.debug_frame, euler_angle_str, (10, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 0, 255)) if pitch < 0: self.hCOUNTER += 1 if self.hCOUNTER >= 20: cv2.putText(self.debug_frame, "SLEEP!!!", (self.face_coords[count][0], self.face_coords[count][1] - 10), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 3) else: if self.hCOUNTER >= 3: self.hTOTAL += 1 self.hCOUNTER = 0 left_ear = self.eye_aspect_ratio(eye_left) right_ear = self.eye_aspect_ratio(eye_right) ear = (left_ear + right_ear) / 2.0 if ear < 0.15: # Eye aspect ratio:0.15 self.COUNTER += 1 if self.COUNTER >= 20: cv2.putText(self.debug_frame, "SLEEP!!!", (self.face_coords[count][0], self.face_coords[count][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) else: # If it is less than the threshold 3 times in a row, it means that an eye blink has been performed if self.COUNTER >= 3: # Threshold: 3 self.TOTAL += 1 # Reset the eye frame counter self.COUNTER = 0 mouth_ratio = self.mouth_aspect_ratio(mouth) if mouth_ratio > 0.5: self.mCOUNTER += 1 else: if self.mCOUNTER >= 3: self.mTOTAL += 1 self.mCOUNTER = 0 cv2.putText( self.debug_frame, "eye:{:d},mouth:{:d},head:{:d}".format(self.TOTAL, self.mTOTAL, self.hTOTAL), (10, 40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, ( 255, 0, 0, )) if self.TOTAL >= 50 or self.mTOTAL >= 15 or self.hTOTAL >= 10: cv2.putText(self.debug_frame, "Danger!!!", (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) except: pass def mouth_aspect_ratio(self, mouth): A = np.linalg.norm(mouth[1] - mouth[5]) # 51, 59 B = np.linalg.norm(mouth[2] - mouth[4]) # 53, 57 C = np.linalg.norm(mouth[0] - mouth[3]) # 49, 55 mar = (A + B) / (2.0 * C) return mar def eye_aspect_ratio(self, eye): A = euclidean(eye[1], eye[5]) B = euclidean(eye[2], eye[4]) C = euclidean(eye[0], eye[3]) return (A + B) / (2.0 * C) def parse(self): if debug: self.debug_frame = self.frame.copy() face_success = self.run_face() if face_success: for i in range(len(self.face_frame)): self.run_land68(self.face_frame[i], i) self.fps.update() if debug: aspect_ratio = self.frame.shape[1] / self.frame.shape[0] cv2.imshow( "Camera_view", cv2.resize(self.debug_frame, (int(900), int(900 / aspect_ratio)))) if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() raise StopIteration() def run_video(self): cap = cv2.VideoCapture(str(Path(self.file).resolve().absolute())) while cap.isOpened(): read_correctly, self.frame = cap.read() if not read_correctly: break try: self.parse() except StopIteration: break cap.release() def run_camera(self): while True: rgb_in = self.cam_out.get() self.frame = np.array(rgb_in.getData()).reshape( (3, rgb_in.getHeight(), rgb_in.getWidth())).transpose(1, 2, 0).astype(np.uint8) try: self.parse() except StopIteration: break def run(self): if self.file is not None: self.run_video() else: self.run_camera() self.fps.stop() print("FPS:{:.2f}".format(self.fps.fps())) del self.device
else: detection_in = device.getInputQueue("detection_in") detection_nn = device.getOutputQueue("detection_nn") age_gender_in = device.getInputQueue("age_gender_in") age_gender_nn = device.getOutputQueue("age_gender_nn") detections = [] results = [] face_bbox_q = queue.Queue() next_id = 0 if args.video: cap = cv2.VideoCapture(str(Path(args.video).resolve().absolute())) fps = FPS() fps.start() def should_run(): return cap.isOpened() if args.video else True def get_frame(): if args.video: return cap.read() else: return True, np.array(cam_out.get().getData()).reshape( (3, 300, 300)).transpose(1, 2, 0).astype(np.uint8) try: while should_run(): read_correctly, frame = get_frame()
class Vision: def __init__(self, model_xml, model_bin, robot_controller, is_headless=True, live_stream=True, confidence_interval=0.5, draw_alignment_info=True, save_video=True): """ Vision class constructor. :param model_xml: Network topology :param model_bin: Network weights :param is_headless: Headless mode flag, if set to true, frames will not be displayed :param live_stream: Live streaming flag, if set to true, frames will be send through websocket :param confidence_interval: Confidence interval for predictions. Only predictions above this value will be processed """ # log.basicConfig(format="[ %(asctime)s ] [ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) log.info("Instantiating Vision class...") # Websocket endpoint for live streaming ws_endpoint = "wss://api.growbot.tardis.ed.ac.uk/stream-video/35ae6830-d961-4a9c-937f-8aa5bc61d6a3" self.is_headless = is_headless self.confidence_interval = confidence_interval self.live_stream = live_stream self.robot_controller = robot_controller self.draw_alignment_info = draw_alignment_info self.save_video = save_video self.frame_counter = 0 # Initialize plugin log.info("Initializing plugin for MYRIAD X VPU...") self.plugin = IEPlugin(device='MYRIAD') # Initialize network log.info("Reading Intermediate Representation...") self.net = IENetwork(model=model_xml, weights=model_bin) # Initialize IO blobs self.input_blob = next(iter(self.net.inputs)) self.out_blob = next(iter(self.net.outputs)) # Load network into IE plugin log.info("Loading Intermediate Representation to the plugin...") self.exec_net = self.plugin.load(network=self.net, num_requests=2) # Extract network's input layer information self.n, self.c, self.h, self.w = self.net.inputs[self.input_blob].shape # Initialize VideoCapture and let it warm up self.cap = cv2.VideoCapture(0) time.sleep(1) # Initialize FPS counter self.fps = FPS() # Get capture dimensions self.initial_w = self.cap.get(3) self.initial_h = self.cap.get(4) # Used to provide OpenCV rendering time self.render_time = 0 # Initialize websocket if self.live_stream: log.info("Connecting to websocket...") self.ws = create_connection(ws_endpoint) def start(self): """ Starts video capture and performs inference using MYRIAD X VPU :return: """ asyncio.set_event_loop(asyncio.new_event_loop()) self.fps.start() log.info("Starting video stream. Press ESC to stop.") ret, frame = self.cap.read() # Async request identifiers cur_request_id = 0 next_request_id = 1 while self.cap.isOpened(): try: self.fps.update() # Read next frame ret, next_frame = self.cap.read() # Break if failed to read if not ret: break # Main synchronization point. Start the next inference request, # while waiting for the current one to complete. inf_start = time.time() # Resize, change layout, reshape to fit network input size and start asynchronous inference in_frame = cv2.resize(next_frame, (self.w, self.h)) in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((self.n, self.c, self.h, self.w)) self.exec_net.start_async(request_id=next_request_id, inputs={self.input_blob: in_frame}) if self.exec_net.requests[cur_request_id].wait(-1) == 0: # Capture inference time inf_end = time.time() det_time = inf_end - inf_start # Parse detection results of the current request res = self.exec_net.requests[cur_request_id].outputs[ self.out_blob] predictions = [ self.process_prediction(frame, pred) for pred in res[0][0] if self.check_threshold(pred[2]) ] self.robot_controller.process_visual_data(predictions, frame) # Display frame self.process_frame(frame) # TODO: Fix live stream #threading.Thread(target=self.process_frame, args=(frame,)).start() # Swap async request identifiers cur_request_id, next_request_id = next_request_id, cur_request_id frame = next_frame # Enable key detection in output window key = cv2.waitKey(1) # Check if ESC has been pressed if key == 27: self.cleanup() break # Catch ctrl+c while in headless mode except KeyboardInterrupt: self.cleanup() break def get_frame(self): """ Returns single frame from video capture. :return: Single frame """ _, frame = self.cap.read() return base64.b64encode(cv2.imencode(".jpg", frame)) def process_frame(self, frame): """ Based on constructor parameters, displays and/or sends frame through websocket. :param frame: Frame to be processed :return: """ self.draw_info(frame) # Send frame if specified if self.live_stream: log.info("Sending frame...") self.ws.send(base64.b64encode(cv2.imencode(".jpg", frame)[1])) # Display frame if specified if not self.is_headless: render_start = time.time() cv2.imshow("Detection Results", frame) render_end = time.time() self.render_time = render_end - render_start if self.save_video: n = str(self.frame_counter).zfill(10) cv2.imwrite("/home/student/capture/frame_" + n + ".jpg", frame) self.frame_counter = self.frame_counter + 1 def draw_info(self, frame): now = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") state = self.robot_controller.get_state() # Draw title/logo cv2.putText(frame, "GrowBot Vision System", (25, 25), cv2.FONT_HERSHEY_DUPLEX, .75, (0, 150, 0), 1, cv2.LINE_AA) # Draw current date cv2.putText(frame, now, (25, 50), cv2.FONT_HERSHEY_DUPLEX, .5, (0, 150, 0), 1, cv2.LINE_AA) # Draw state cv2.putText(frame, state, (25, 75), cv2.FONT_HERSHEY_DUPLEX, .5, (0, 150, 0), 1, cv2.LINE_AA) def visualise_prediction(self, frame, pred_boxpts, label, prob): """ Draws bounding box and class probability around prediction. :param frame: Frame that contains prediction :param pred_boxpts: Bounding box coordinates :param label: Class label :param prob: Class probability :return: """ # Draw bounding box and class label color = (0, 255, 0) if label == "Plant" else (0, 0, 255) cv2.rectangle(frame, pred_boxpts[0], pred_boxpts[1], color, 2) cv2.putText(frame, label + ' ' + str(round(prob * 100, 1)) + ' %', (pred_boxpts[0][0], pred_boxpts[0][1] - 7), cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1) if self.draw_alignment_info: # Draw triangle in the centre of the frame. frame_centre = 320 pts_centre = np.array( [[frame_centre - 10, 480], [frame_centre, 430], [frame_centre + 10, 480]], np.int32).reshape((-1, 1, 2)) cv2.polylines(frame, [pts_centre], True, (255, 0, 0)) if label is "Plant": # Draw triangle indicating midpoint of the bounding box. ((xmin, ymin), (xmax, ymax)) = pred_boxpts midpoint = (xmax + xmin) / 2 pts_bb_midpoint = np.array( [[midpoint - 10, 480], [midpoint, 430], [midpoint + 10, 480]], np.int32).reshape((-1, 1, 2)) cv2.polylines(frame, [pts_bb_midpoint], True, (0, 255, 0)) # Draw centre acceptance interval. delta = min( 120, int(6 / (((xmax - xmin) * (ymax - ymin)) / (640 * 480)))) cv2.rectangle(frame, (320 - delta, 0), (320 + delta, 480), (153, 255, 255), 1) def process_prediction(self, frame, prediction): """ Helper function responsible for bounding box extraction, labelling and data visualization. :param frame: Frame that contains prediction :param prediction: Actual prediction produced by the VPU :return: Triple that contains class label, class probability and prediction bounding boxes """ # Extract bounding box coordinates in the format (xmin, ymin), (xmax, ymax) pred_boxpts = ((int(prediction[3] * self.initial_w), int(prediction[4] * self.initial_h)), (int(prediction[5] * self.initial_w), int(prediction[6] * self.initial_h))) # Set class label label = 'Plant' if int(prediction[1]) == 16 else 'Obstacle' if label is 'Plant': log.info( "Prediction: {0}, confidence={1:.10f}, boxpoints={2}".format( label, round(prediction[2], 4), pred_boxpts)) # Draw bounding box and class label with its probability self.visualise_prediction(frame, pred_boxpts, label, prediction[2]) return label, prediction[2], pred_boxpts def check_threshold(self, probability): """ Validate and check probability of a prediction. :param probability: Class probability :return: True if probability is not NaN and is within (confidence_interval,1] """ return (not math.isnan(probability) ) and 1 >= probability > self.confidence_interval def cleanup(self): """ Performs cleanup before termination. :return: """ self.cap.release() if self.live_stream: self.ws.close() if not self.is_headless: cv2.destroyAllWindows() self.fps.stop()
# import the necessary packages from __future__ import print_function from imutils.video import WebcamVideoStream from imutils.video import FPS import argparse import imutils import cv2 import numpy as np vs = WebcamVideoStream(src=0).start() fps = FPS.start() while (True): # Capture frame-by-frame frame = vs.read() # Display the resulting frame cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture vs.release() cv2.destroyAllWindows()
def receive_stream(self, display=True): """ Displays displayed stream in a window if no arguments are passed. Keeps updating the 'current_frame' attribute with the most recent frame, this can be accessed using 'self.current_frame' :param display: boolean, If False no stream output will be displayed. :return: None """ self.keep_running = True fps = FPS() fps.start() initBB = None pastBB = None ite_counter = 0 overall_confidence = None success = None tracker = cv2.TrackerMedianFlow_create() grab = False while self.footage_socket and self.keep_running: try: client_response = [] f = self.footage_socket.recv_string() self.current_frame = string_to_image(f) if display: frame = self.current_frame frame = rotate_bound(frame, 90) (H, W) = frame.shape[:2] if initBB is not None: (success, box) = tracker.update(frame) if success: (x, y, w, h) = [int(v) for v in box] cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) center = (x + w / 2, y + h / 2) try: offset = (W / 2 - center[0], H / 2 - center[1], z) except: offset = (W / 2 - center[0], H / 2 - center[1]) old_center = (pastBB[0] + pastBB[2] / 2, pastBB[1] + pastBB[3] / 2) old_offset = (W / 2 - old_center[0], H / 2 - old_center[1]) offset_w = 5 * float(W / w) offset_h = 2 * float(H / h) if float(W * H) / float(w * h) > 1.2: try: if offset[ 0] > offset_w and self.max > 0: #right client_response.append('2') self.max = self.max - 1 elif offset[ 0] < -offset_w and self.max < 70: #left client_response.append('8') self.max = self.max + 1 except ValueError: pass try: if offset[1] > offset_h: #up client_response.append("6") elif offset[1] < -offset_h: #down client_response.append("4") except ValueError: pass else: grab = True pastBB = (x, y, w, h) if ite_counter > 20: ite_counter = 0 try: target = frame[y:y + h, x:x + w] blob = cv2.dnn.blobFromImage( cv2.resize(target, (300, 300)), 0.007843, (300, 300), 127.5) net.setInput(blob) detections = net.forward() for i in np.arange(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.8: idx = int(detections[0, 0, i, 1]) box = detections[0, 0, i, 3:7] * np.array( [w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") if CLASSES[idx] != TARGET_CLASS: initBB = None tracker = cv2.TrackerMedianFlow_create( ) else: if (float( abs(startX - endX) * abs(startY - endY)) / float(w * h) < 0.8): initBB = None tracker = cv2.TrackerMedianFlow_create( ) else: overall_confidence = confidence else: initBB = None tracker = cv2.TrackerMedianFlow_create( ) except: initBB = None tracker = cv2.TrackerMedianFlow_create() else: ite_counter += 1 else: initBB = None tracker = cv2.TrackerMedianFlow_create() else: blob = cv2.dnn.blobFromImage( cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5) net.setInput(blob) detections = net.forward() for i in np.arange(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.8: idx = int(detections[0, 0, i, 1]) box = detections[0, 0, i, 3:7] * np.array( [W, H, W, H]) (startX, startY, endX, endY) = box.astype("int") if CLASSES[idx] != TARGET_CLASS: overall_confidence = 0 pass else: overall_confidence = confidence initBB = (startX, startY, abs(endX - startX), abs(endY - startY)) pastBB = initBB tracker.init(frame, initBB) else: overall_confidence = 0 fps.update() fps.stop() info = [("Tracker", "medianflow"), ("Track success", "Yes" if success else "No"), ("FPS", "{:.2f}".format(fps.fps())), ("Object", TARGET_CLASS), ("Confidence", "{:.2f}".format(overall_confidence)) ] for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (10, H - ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) cv2.imshow("Object Tracker: {}".format(TARGET_CLASS), frame) key = cv2.waitKey(1) if not grab: for i in range(len(client_response), 2): client_response.append('0') self.footage_socket.send_string( ','.join(client_response)) else: self.footage_socket.send_string('1') self.stop() except KeyboardInterrupt: cv2.destroyAllWindows() break print("Streaming Stopped!")
class Main: def __init__(self, device): self.device = device print("Starting pipeline...") self.device.startPipeline() if camera: self.cam_out = self.device.getOutputQueue("cam_out") else: self.face_in = self.device.getInputQueue("face_in") if not camera: self.cap = cv2.VideoCapture( str(Path(args.video).resolve().absolute())) self.frame = None self.face_box_q = queue.Queue() self.bboxes = [] self.left_bbox = None self.right_bbox = None self.nose = None self.pose = None self.gaze = None self.running = True self.fps = FPS() self.fps.start() def face_thread(self): face_nn = self.device.getOutputQueue("face_nn") landmark_in = self.device.getInputQueue("landmark_in") pose_in = self.device.getInputQueue("pose_in") while self.running: if self.frame is None: continue try: bboxes = np.array(face_nn.get().getFirstLayerFp16()) except RuntimeError as ex: continue bboxes = bboxes.reshape((bboxes.size // 7, 7)) self.bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7] for raw_bbox in self.bboxes: bbox = frame_norm(self.frame, raw_bbox) det_frame = self.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] land_data = depthai.NNData() land_data.setLayer("0", to_planar(det_frame, (48, 48))) landmark_in.send(land_data) pose_data = depthai.NNData() pose_data.setLayer("data", to_planar(det_frame, (60, 60))) pose_in.send(pose_data) self.face_box_q.put(bbox) def land_pose_thread(self): landmark_nn = self.device.getOutputQueue(name="landmark_nn", maxSize=1, blocking=False) pose_nn = self.device.getOutputQueue(name="pose_nn", maxSize=1, blocking=False) gaze_in = self.device.getInputQueue("gaze_in") while self.running: try: land_in = landmark_nn.get().getFirstLayerFp16() except RuntimeError as ex: continue try: face_bbox = self.face_box_q.get(block=True, timeout=100) except queue.Empty: continue self.face_box_q.task_done() left = face_bbox[0] top = face_bbox[1] face_frame = self.frame[face_bbox[1]:face_bbox[3], face_bbox[0]:face_bbox[2]] land_data = frame_norm(face_frame, land_in) land_data[::2] += left land_data[1::2] += top left_bbox = padded_point(land_data[:2], padding=30, frame_shape=self.frame.shape) if left_bbox is None: print("Point for left eye is corrupted, skipping nn result...") continue self.left_bbox = left_bbox right_bbox = padded_point(land_data[2:4], padding=30, frame_shape=self.frame.shape) if right_bbox is None: print( "Point for right eye is corrupted, skipping nn result...") continue self.right_bbox = right_bbox self.nose = land_data[4:6] left_img = self.frame[self.left_bbox[1]:self.left_bbox[3], self.left_bbox[0]:self.left_bbox[2]] right_img = self.frame[self.right_bbox[1]:self.right_bbox[3], self.right_bbox[0]:self.right_bbox[2]] try: self.pose = [ val[0][0] for val in to_tensor_result(pose_nn.get()).values() ] except RuntimeError as ex: continue gaze_data = depthai.NNData() gaze_data.setLayer("left_eye_image", to_planar(left_img, (60, 60))) gaze_data.setLayer("right_eye_image", to_planar(right_img, (60, 60))) gaze_data.setLayer("head_pose_angles", self.pose) gaze_in.send(gaze_data) def gaze_thread(self): gaze_nn = self.device.getOutputQueue("gaze_nn") while self.running: try: self.gaze = np.array(gaze_nn.get().getFirstLayerFp16()) except RuntimeError as ex: continue def should_run(self): if self.running: return True if camera else self.cap.isOpened() else: return False def get_frame(self, retries=0): if camera: return True, np.array(self.cam_out.get().getData()).reshape( (3, 300, 300)).transpose(1, 2, 0).astype(np.uint8) else: read_correctly, new_frame = self.cap.read() if not read_correctly or new_frame is None: if retries < 5: return self.get_frame(retries + 1) else: print("Source closed, terminating...") return False, None else: return read_correctly, new_frame def run(self): self.threads = [ threading.Thread(target=self.face_thread), threading.Thread(target=self.land_pose_thread), threading.Thread(target=self.gaze_thread) ] for thread in self.threads: thread.start() while self.should_run(): try: read_correctly, new_frame = self.get_frame() except RuntimeError: continue if not read_correctly: break self.fps.update() self.frame = new_frame self.debug_frame = self.frame.copy() if not camera: nn_data = depthai.NNData() nn_data.setLayer("data", to_planar(self.frame, (300, 300))) self.face_in.send(nn_data) if debug: # face if self.gaze is not None and self.left_bbox is not None and self.right_bbox is not None: re_x = (self.right_bbox[0] + self.right_bbox[2]) // 2 re_y = (self.right_bbox[1] + self.right_bbox[3]) // 2 le_x = (self.left_bbox[0] + self.left_bbox[2]) // 2 le_y = (self.left_bbox[1] + self.left_bbox[3]) // 2 x, y = (self.gaze * 100).astype(int)[:2] if args.lazer: beam_img = np.zeros(self.debug_frame.shape, np.uint8) for t in range(10)[::-2]: cv2.line(beam_img, (re_x, re_y), ((re_x + x * 100), (re_y - y * 100)), (0, 0, 255 - t * 10), t * 2) cv2.line(beam_img, (le_x, le_y), ((le_x + x * 100), (le_y - y * 100)), (0, 0, 255 - t * 10), t * 2) self.debug_frame |= beam_img else: cv2.arrowedLine(self.debug_frame, (le_x, le_y), (le_x + x, le_y - y), (255, 0, 255), 3) cv2.arrowedLine(self.debug_frame, (re_x, re_y), (re_x + x, re_y - y), (255, 0, 255), 3) if not args.lazer: for raw_bbox in self.bboxes: bbox = frame_norm(self.frame, raw_bbox) cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2) if self.nose is not None: cv2.circle(self.debug_frame, (self.nose[0], self.nose[1]), 2, (0, 255, 0), thickness=5, lineType=8, shift=0) if self.left_bbox is not None: cv2.rectangle(self.debug_frame, (self.left_bbox[0], self.left_bbox[1]), (self.left_bbox[2], self.left_bbox[3]), (245, 10, 10), 2) if self.right_bbox is not None: cv2.rectangle(self.debug_frame, (self.right_bbox[0], self.right_bbox[1]), (self.right_bbox[2], self.right_bbox[3]), (245, 10, 10), 2) if self.pose is not None and self.nose is not None: #Publishing transform to ros tf publish_transform(self.pose) draw_3d_axis(self.debug_frame, self.pose, self.nose) if camera: cv2.imshow("Camera view", self.debug_frame) else: aspect_ratio = self.frame.shape[1] / self.frame.shape[0] cv2.imshow( "Video view", cv2.resize(self.debug_frame, (int(900), int(900 / aspect_ratio)))) if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() break self.fps.stop() print("FPS: {:.2f}".format(self.fps.fps())) if not camera: self.cap.release() cv2.destroyAllWindows() for i in range(1, 5): # https://stackoverflow.com/a/25794701/5494277 cv2.waitKey(1) self.running = False
class Main: def __init__(self, file=None, camera=False): print("Loading pipeline...") self.file = file self.camera = camera self.create_pipeline() self.start_pipeline() self.COUNTER = 0 self.mCOUNTER = 0 self.hCOUNTER = 0 self.TOTAL = 0 self.mTOTAL = 0 self.hTOTAL = 0 self.fps = FPS() self.fps.start() def create_pipeline(self): print("Creating pipeline...") self.pipeline = depthai.Pipeline() if self.camera: print("Creating Color Camera...") cam = self.pipeline.createColorCamera() cam.setPreviewSize(300, 300) cam.setResolution( depthai.ColorCameraProperties.SensorResolution.THE_1080_P) cam.setInterleaved(False) cam.setCamId(0) cam_xout = self.pipeline.createXLinkOut() cam_xout.setStreamName("cam_out") cam.preview.link(cam_xout.input) self.models("models/face-detection-retail-0004.blob", "face") self.models("models/face_landmark_160x160.blob", "land68") def models(self, model_path, name): print(f"开始创建{model_path}神经网络") model_in = self.pipeline.createXLinkIn() model_in.setStreamName(f"{name}_in") model_nn = self.pipeline.createNeuralNetwork() model_nn.setBlobPath(str(Path(model_path).resolve().absolute())) model_nn_xout = self.pipeline.createXLinkOut() model_nn_xout.setStreamName(f"{name}_nn") model_in.out.link(model_nn.input) model_nn.out.link(model_nn_xout.input) def start_pipeline(self): self.device = depthai.Device(self.pipeline, usb2Mode=True) print("Starting pipeline...") self.device.startPipeline() self.face_in = self.device.getInputQueue("face_in") self.face_nn = self.device.getOutputQueue("face_nn") self.land68_in = self.device.getInputQueue("land68_in") self.land68_nn = self.device.getOutputQueue("land68_nn") if self.camera: self.cam_out = self.device.getOutputQueue("cam_out", 1, True) def full_frame_cords(self, cords): original_cords = self.face_coords[0] return [ original_cords[0 if i % 2 == 0 else 1] + val for i, val in enumerate(cords) ] def full_frame_bbox(self, bbox): relative_cords = self.full_frame_cords(bbox) height, width = self.frame.shape[:2] y_min = max(0, relative_cords[1]) y_max = min(height, relative_cords[3]) x_min = max(0, relative_cords[0]) x_max = min(width, relative_cords[2]) result_frame = self.frame[y_min:y_max, x_min:x_max] return result_frame, relative_cords def draw_bbox(self, bbox, color): cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) def run_face(self): nn_data = run_nn(self.face_in, self.face_nn, {"data": to_planar(self.frame, (300, 300))}) results = to_bbox_result(nn_data) self.face_coords = [ frame_norm(self.frame, *obj[3:7]) for obj in results if obj[2] > 0.4 ] if len(self.face_coords) == 0: return False if len(self.face_coords) > 0: for face_coord in self.face_coords: face_coord[0] -= 15 face_coord[1] -= 15 face_coord[2] += 15 face_coord[3] += 15 self.face_frame = [ self.frame[face_coord[1]:face_coord[3], face_coord[0]:face_coord[2]] for face_coord in self.face_coords ] if debug: for bbox in self.face_coords: self.draw_bbox(bbox, (10, 245, 10)) return True def run_land68(self, face_frame, count): try: nn_data = run_nn(self.land68_in, self.land68_nn, {"data": to_planar(face_frame, (160, 160))}) out = to_nn_result(nn_data) result = frame_norm(face_frame, *out) eye_left = [] eye_right = [] mouth = [] hand_points = [] for i in range(72, 84, 2): # cv2.circle(self.debug_frame,(result[i]+self.face_coords[count][0],result[i+1]+self.face_coords[count][1]),2,(255,0,0),thickness=1,lineType=8,shift=0) eye_left.append((out[i], out[i + 1])) for i in range(84, 96, 2): # cv2.circle(self.debug_frame,(result[i]+self.face_coords[count][0],result[i+1]+self.face_coords[count][1]),2,(255,0,0),thickness=1,lineType=8,shift=0) eye_right.append((out[i], out[i + 1])) for i in range(96, len(result), 2): # cv2.circle(self.debug_frame,(result[i]+self.face_coords[count][0],result[i+1]+self.face_coords[count][1]),2,(255,0,0),thickness=1,lineType=8,shift=0) if i == 100 or i == 116 or i == 104 or i == 112 or i == 96 or i == 108: # cv2.circle(self.debug_frame,(result[i]+self.face_coords[count][0],result[i+1]+self.face_coords[count][1]),2,(255,0,0),thickness=1,lineType=8,shift=0) mouth.append(np.array([out[i], out[i + 1]])) for i in range(16, 110, 2): if i == 16 or i == 60 or i == 72 or i == 90 or i == 96 or i == 108: cv2.circle(self.debug_frame, (result[i] + self.face_coords[count][0], result[i + 1] + self.face_coords[count][1]), 2, (255, 0, 0), thickness=1, lineType=8, shift=0) hand_points.append( (result[i] + self.face_coords[count][0], result[i + 1] + self.face_coords[count][1])) ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = get_pose_estimation( self.frame.shape, np.array(hand_points, dtype='double')) ret, pitch, yaw, roll = get_euler_angle(rotation_vector) (nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs) p1 = (int(hand_points[1][0]), int(hand_points[1][1])) p2 = (int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1])) cv2.line(self.debug_frame, p1, p2, (255, 0, 0), 2) euler_angle_str = 'Y:{}, X:{}, Z:{}'.format(pitch, yaw, roll) cv2.putText(self.debug_frame, euler_angle_str, (10, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 0, 255)) # print(euler_angle_str) if pitch < 0: self.hCOUNTER += 1 if self.hCOUNTER >= 20: cv2.putText(self.debug_frame, "SLEEP!!!", (self.face_coords[count][0], self.face_coords[count][1] - 10), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 3) else: if self.hCOUNTER >= 3: self.hTOTAL += 1 self.hCOUNTER = 0 left_ear = self.eye_aspect_ratio(eye_left) right_ear = self.eye_aspect_ratio(eye_right) ear = (left_ear + right_ear) / 2.0 if ear < 0.15: # 眼睛长宽比:0.15 self.COUNTER += 1 if self.COUNTER >= 20: cv2.putText(self.debug_frame, "SLEEP!!!", (self.face_coords[count][0], self.face_coords[count][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) else: # 如果连续3次都小于阈值,则表示进行了一次眨眼活动 if self.COUNTER >= 3: # 阈值:3 self.TOTAL += 1 # 重置眼帧计数器 self.COUNTER = 0 mouth_ratio = self.mouth_aspect_ratio(mouth) if mouth_ratio > 0.5: self.mCOUNTER += 1 else: if self.mCOUNTER >= 3: self.mTOTAL += 1 self.mCOUNTER = 0 # print(self.TOTAL,self.mTOTAL,self.hTOTAL) cv2.putText( self.debug_frame, "eye:{:d},mouth:{:d},head:{:d}".format(self.TOTAL, self.mTOTAL, self.hTOTAL), (10, 40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, ( 255, 0, 0, )) if self.TOTAL >= 50 or self.mTOTAL >= 15 or self.hTOTAL >= 10: cv2.putText(self.debug_frame, "Danger!!!", (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) except: pass def mouth_aspect_ratio(self, mouth): A = np.linalg.norm(mouth[1] - mouth[5]) # 51, 59 B = np.linalg.norm(mouth[2] - mouth[4]) # 53, 57 C = np.linalg.norm(mouth[0] - mouth[3]) # 49, 55 mar = (A + B) / (2.0 * C) return mar def eye_aspect_ratio(self, eye): A = euclidean(eye[1], eye[5]) B = euclidean(eye[2], eye[4]) C = euclidean(eye[0], eye[3]) return (A + B) / (2.0 * C) def parse(self): if debug: self.debug_frame = self.frame.copy() face_success = self.run_face() if face_success: for i in range(len(self.face_frame)): self.run_land68(self.face_frame[i], i) if debug: aspect_ratio = self.frame.shape[1] / self.frame.shape[0] cv2.imshow( "Camera_view", cv2.resize(self.debug_frame, (int(900), int(900 / aspect_ratio)))) if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() raise StopIteration() def run_video(self): cap = cv2.VideoCapture(str(Path(self.file).resolve().absolute())) while cap.isOpened(): read_correctly, self.frame = cap.read() if not read_correctly: break try: self.parse() except StopIteration: break cap.release() def run_camera(self): while True: self.frame = np.array(self.cam_out.get().getData()).reshape( (3, 300, 300)).transpose(1, 2, 0).astype(np.uint8) self.fps.update() try: self.parse() except StopIteration: break def run(self): if self.file is not None: self.run_video() else: self.run_camera() self.fps.stop() print("FPS:{:.2f}".format(self.fps.fps())) del self.device
class VideoPlayer(QtCore.QObject): def __init__(self, ball_tracker: BallTracker) -> None: """Creates an instance of this class that contains properties used by the video player to display frames processed by the ball tracker""" super().__init__() self.ball_tracker = ball_tracker or BallTracker() self.video_processor_lock = threading.Lock() self.video_processor_stop_event = threading.Event() self.video_processor: VideoProcessor | None = None self.video_file_stream: VideoFileStream | None = None self.video_file: str | None = None self._width = 1100 self._height = 600 self._play = False self._crop_frames = False self._show_threshold = False self._perform_morph = False self._detect_table = False self._queue_size = 0 self._fps = FPS() self._output_frame = np.array([]) self._hsv_frame = np.array([]) widthChanged = QtCore.pyqtSignal(int) @property def width(self) -> int: """Width property :return: player width :rtype: int """ return self._width @width.setter def width(self, value: int) -> None: """Width setter :param value: value to set :type value: int """ self._width = value self.widthChanged.emit(self._width) heightChanged = QtCore.pyqtSignal(int) @property def height(self) -> int: """Height property :return: player height :rtype: int """ return self._height @height.setter def height(self, value: int) -> None: """Height setter :param value: value to set :type value: int """ self._height = value self.heightChanged.emit(self._height) playChanged = QtCore.pyqtSignal(bool) @property def play(self) -> bool: """Play property :return: play video :rtype: bool """ return self._play @play.setter def play(self, value: bool) -> None: """Play setter :param value: value to set :type value: bool """ self._play = value self.playChanged.emit(self._play) crop_framesChanged = QtCore.pyqtSignal(bool) @property def crop_frames(self) -> bool: """Crop frames property :return: crop frames :rtype: bool """ return self._crop_frames @crop_frames.setter def crop_frames(self, value: bool) -> None: """Crop frames setter :param value: value to set :type value: bool """ self._crop_frames = value self.crop_framesChanged.emit(self._crop_frames) show_thresholdChanged = QtCore.pyqtSignal(bool) @property def show_threshold(self) -> bool: """Show threshold property :return: show threshold :rtype: bool """ return self._show_threshold @show_threshold.setter def show_threshold(self, value: bool) -> None: """Show threshold setter :param value: value to set :type value: bool """ self._show_threshold = value self.show_thresholdChanged.emit(self._show_threshold) perform_morphChanged = QtCore.pyqtSignal(bool) @property def perform_morph(self) -> bool: """Perform morph property :return: perform morph :rtype: bool """ return self._perform_morph @perform_morph.setter def perform_morph(self, value: bool) -> None: """Perform morph setter :param value: value to set :type value: bool """ self._perform_morph = value self.perform_morphChanged.emit(self._perform_morph) detect_tableChanged = QtCore.pyqtSignal(bool) @property def detect_table(self) -> bool: """Detect table property :return: detect table :rtype: bool """ return self._detect_table @detect_table.setter def detect_table(self, value: bool) -> None: """Detect table setter :param value: value to set :type value: bool """ self._detect_table = value self.detect_tableChanged.emit(self._detect_table) queue_sizeChanged = QtCore.pyqtSignal(int) @property def queue_size(self) -> int: """Queue size property :return: queue size :rtype: int """ return self._queue_size @queue_size.setter def queue_size(self, value: int) -> None: """Queue size setter :param value: value to set :type value: int """ self._queue_size = value self.queue_sizeChanged.emit(self._queue_size) fpsChanged = QtCore.pyqtSignal(int) def start_fps(self) -> None: """Start FPS timer""" self._fps = FPS() self._fps.start() def update_fps(self) -> None: """Update FPS timer""" self._fps.update() def stop_fps(self) -> None: """Stop FPS timer""" self._fps.stop() self.fpsChanged.emit(self._fps.fps()) output_frameChanged = QtCore.pyqtSignal(np.ndarray) @property def output_frame(self) -> Frame: """Frame property :return: output frame :rtype: np.ndarray """ return self._output_frame @output_frame.setter def output_frame(self, value: Frame) -> None: """Output frame setter :param value: value to set :type value: np.ndarray """ self._output_frame = value self.output_frameChanged.emit(self._output_frame) hsv_frameChanged = QtCore.pyqtSignal(np.ndarray) @property def hsv_frame(self) -> Frame: """HSV frame property :return: hsv frame :rtype: np.ndarray """ return self._hsv_frame @hsv_frame.setter def hsv_frame(self, value: Frame) -> None: """HSV frame setter :param value: value to set :type value: np.ndarray """ self._hsv_frame = value self.hsv_frameChanged.emit(self._hsv_frame) def start(self, video_file: str | None = None) -> None: """Creates VideoProcessor and VideoFileStream instances to handle the selected video file. The VideoFileStream is the producer thread and the VideoProcessor is the consumer thread, where the VideoFileStream instance reads frames from the video file and puts them into a queue for the VideoProcessor to obtain frames to process from. The VideoProcessor then passes processed frames to the VideoPlayer to display to the user. :param video_file: video file to read from, defaults to None :type video_file: str, optional :raises TypeError: if `video_file` isn't an actual video file """ if video_file and "video" not in magic.from_file( video_file, mime=True): # type: ignore[no-untyped-call] raise TypeError(f"{video_file} is not a video file") self.play = False self.video_file = video_file or self.video_file if self.video_file is None: raise ValueError("video_file is not set") self.destroy_video_threads() self.video_processor_stop_event.clear() self.video_file_stream = VideoFileStream( self.video_file, video_player=self, colour_settings=self.ball_tracker.colour_settings, queue_size=1, ) self.video_processor = VideoProcessor( video_stream=self.video_file_stream, video_player=self, ball_tracker=self.ball_tracker, lock=self.video_processor_lock, stop_event=self.video_processor_stop_event, ) self.video_processor.start() def restart(self) -> None: """Restart the video player by destroying the VideoProcessor and VideoFileStream instances and creating new ones before starting the video player again.""" self.start() def destroy_video_threads(self) -> None: """Destroy the VideoProcessor and VideoFileStream thread instances""" if self.video_processor is not None: if self.video_file_stream is not None: with self.video_processor_lock: self.video_file_stream.stop() self.video_processor_stop_event.set() self.video_processor.join()