def fire_detection(source, video_path, output, fps, frame_size): """ 基于深度学习的烟火检测器 """ # click.echo(click.get_current_context().params) device_info = getDeviceInfo() # type: dai.DeviceInfo with dai.Device(create_pipeline(source), device_info) as device: fps_handler = FPSHandler() if source: cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_shape = [frame_height, frame_width] print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape) cap_fps = int(cap.get(cv2.CAP_PROP_FPS)) print("CAP_PROP_FPS: %d" % cap_fps) fire_in = device.getInputQueue("fire_in") else: cam_out = device.getOutputQueue("rgb") fire_nn = device.getOutputQueue("fire_nn") def should_run(): if source: return cap.isOpened() else: return True def get_frame(): if source: return cap.read() else: return True, cam_out.get().getCvFrame() if output: output.parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*"mp4v") writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size) label_map = ["fire", "normal", "smoke"] while should_run(): read_correctly, frame = get_frame() if not read_correctly: break frame_debug = frame.copy() if source: run_nn(frame_debug, fire_in, 224, 224) fire_data = toTensorResult(fire_nn.get()).get("final_result") fps_handler.tick("fire") conf = np.max(fire_data) if conf > 0.5: label = label_map[np.argmax(fire_data)] drawText(frame_debug, f"{label}", (10, 30), "black") drawText(frame_debug, f"{conf:.2%}", (10, 50), "black") cv2.imshow("", frame_debug) if output: writer.write(cv2.resize(frame_debug, frame_size)) key = cv2.waitKey(1) if key in [ord("q"), 27]: break elif key == ord("s"): cv2.imwrite( "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()), frame_debug, ) fps_handler.printStatus() if source: cap.release() if output: writer.release() cv2.destroyAllWindows()
def blink(source, video_path, output, fps, frame_size): """ 在视频流中实时检测和计数眨眼次数 """ # click.echo(click.get_current_context().params) device_info = getDeviceInfo() # type: dai.DeviceInfo with dai.Device(create_pipeline(source), device_info) as device: fps_handler = FPSHandler() if source: cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_shape = [frame_height, frame_width] print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape) cap_fps = int(cap.get(cv2.CAP_PROP_FPS)) print("CAP_PROP_FPS: %d" % cap_fps) mesh_in = device.getInputQueue("mesh_in") else: cam_out = device.getOutputQueue("rgb") mesh_nn = device.getOutputQueue("mesh_nn") eye_in = device.getInputQueue("eye_in") eye_nn = device.getOutputQueue("eye_nn") left_eye_blink = [] right_eye_blink = [] left_number_of_blinks = 0 right_number_of_blinks = 0 def should_run(): if source: return cap.isOpened() else: return True def get_frame(): if source: return cap.read() else: return True, cam_out.get().getCvFrame() if output: output.parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*"mp4v") writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size) while should_run(): read_correctly, frame = get_frame() if not read_correctly: break frame_debug = frame.copy() if source: run_nn(frame_debug, mesh_in, 192, 192) mesh_data = toTensorResult(mesh_nn.get()) fps_handler.tick("Mesh") score = mesh_data.get("conv2d_31").reshape((1, )) if score > 0.5: mesh = mesh_data.get("conv2d_21").reshape((468, 3)) wh = np.array(frame_debug.shape[:2])[::-1] mesh *= np.array([*wh / 192, 1]) for v in mesh.astype(int): cv2.circle(frame_debug, v[:2], 1, (191, 255, 0)) left_eye, left_box = get_mini_box_frame( frame_debug, np.array([mesh[71], mesh[107], mesh[116], mesh[197]]), ) run_nn(left_eye, eye_in, 32, 32) left_eye_blink.append( np.argmax(toTensorResult(eye_nn.get()).get("19"))) if (len(left_eye_blink) > 5 and left_eye_blink[-1] not in left_eye_blink[-5:-1]): left_number_of_blinks += 1 if len(left_eye_blink) > 20: del left_eye_blink[0] right_eye, right_box = get_mini_box_frame( frame_debug, np.array([mesh[301], mesh[336], mesh[345], mesh[197]]), ) run_nn(right_eye, eye_in, 32, 32) right_eye_blink.append( np.argmax(toTensorResult(eye_nn.get()).get("19"))) if (len(right_eye_blink) > 5 and right_eye_blink[-1] not in right_eye_blink[-5:-1]): right_number_of_blinks += 1 if len(right_eye_blink) > 20: del right_eye_blink[0] cv2.drawContours(frame_debug, [right_box], -1, (0, 139, 0), 2) cv2.drawContours(frame_debug, [left_box], -1, (0, 139, 0), 2) cv2.imshow("left", left_eye) cv2.imshow("right", right_eye) drawText( frame_debug, f"NumberOfBlinksOfRightEye: {right_number_of_blinks}", (20, 50), color="red", ) drawText( frame_debug, f"NumberOfBlinksOfLeftEye: {left_number_of_blinks}", (20, 80), color="red", ) cv2.imshow("", frame_debug) if output: writer.write(cv2.resize(frame_debug, frame_size)) key = cv2.waitKey(1) if key in [ord("q"), 27]: break elif key == ord("s"): cv2.imwrite( "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()), frame_debug, ) fps_handler.printStatus() if source: cap.release() if output: writer.release() cv2.destroyAllWindows()
class Main: def __init__(self): print("Loading pipeline...") self.start_pipeline() if not camera: self.cap = cv2.VideoCapture( str(Path(args.video).resolve().absolute())) self.fps_handler = FPSHandler() self.frame = None self.face_box_q = queue.Queue() self.bboxes = [] self.running = True self.result = None self.face_bbox = None def start_pipeline(self): print("Starting pipeline...") self.device = depthai.Device(create_pipeline()) if camera: self.cam_out = self.device.getOutputQueue("cam_out", 1, False) else: self.face_in = self.device.getInputQueue("face_in") def draw_bbox(self, bbox, color): cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) def run_face(self): face_nn = self.device.getOutputQueue("face_nn") land68_in = self.device.getInputQueue("land68_in", 4, False) while self.running: if self.frame is None: continue bboxes = np.array(face_nn.get().getFirstLayerFp16()) bboxes = bboxes.reshape((bboxes.size // 7, 7)) self.bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7] for raw_bbox in self.bboxes: bbox = frame_norm(self.frame, raw_bbox) det_frame = self.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] land68_data = depthai.NNData() land68_data.setLayer("data", to_planar(det_frame, (160, 160))) land68_in.send(land68_data) self.face_box_q.put(bbox) def run_land68(self): land68_nn = self.device.getOutputQueue("land68_nn", 4, False) while self.running: self.results = queue.Queue() self.face_bboxs = queue.Queue() while len(self.bboxes): face_bbox = self.face_box_q.get() face_bbox[1] -= 15 face_bbox[3] += 15 face_bbox[0] -= 15 face_bbox[2] += 15 self.face_bboxs.put(face_bbox) face_frame = self.frame[face_bbox[1]:face_bbox[3], face_bbox[0]:face_bbox[2]] land68_data = land68_nn.get() out = to_tensor_result(land68_data).get( "StatefulPartitionedCall/strided_slice_2/Split.0") result = coordinate(face_frame, *out) self.results.put(result) def should_run(self): return True if camera else self.cap.isOpened() def get_frame(self, retries=0): if camera: rgb_output = self.cam_out.get() return True, np.array(rgb_output.getData()).reshape( (3, rgb_output.getHeight(), rgb_output.getWidth())).transpose(1, 2, 0).astype(np.uint8) else: read_correctly, new_frame = self.cap.read() if not read_correctly or new_frame is None: if retries < 5: return self.get_frame(retries + 1) else: print("Source closed, terminating...") return False, None else: return read_correctly, new_frame def run(self): self.threads = [ threading.Thread(target=self.run_face, daemon=True), threading.Thread(target=self.run_land68, daemon=True), ] for thread in self.threads: thread.start() while self.should_run(): read_correctly, new_frame = self.get_frame() if not read_correctly: break self.fps_handler.tick("Frame") self.frame = new_frame self.debug_frame = self.frame.copy() if not camera: nn_data = depthai.NNData() nn_data.setLayer("data", to_planar(self.frame, (300, 300))) self.face_in.send(nn_data) if debug: if self.results.qsize() > 0 and self.face_bboxs.qsize() > 0: try: for i in range(self.results.qsize()): face_bbox = self.face_bboxs.get() result = self.results.get() bbox = frame_norm(self.frame, self.bboxes[i]) self.draw_bbox(bbox, (0, 255, 0)) self.hand_points = [] # 17 Left eyebrow upper left corner/21 Left eyebrow right corner/22 Right eyebrow upper left corner/26 Right eyebrow upper right corner/36 Left eye upper left corner/39 Left eye upper right corner/42 Right eye upper left corner/ # 45 Upper right corner of the right eye/31 Upper left corner of the nose/35 Upper right corner of the nose/48 Upper left corner/54 Upper right corner of the mouth/57 Lower central corner of the mouth/8 Chin corner # The coordinates are two points, so you have to multiply by 2. self.hand_points.append( (result[34] + face_bbox[0], result[35] + face_bbox[1])) self.hand_points.append( (result[42] + face_bbox[0], result[43] + face_bbox[1])) self.hand_points.append( (result[44] + face_bbox[0], result[45] + face_bbox[1])) self.hand_points.append( (result[52] + face_bbox[0], result[53] + face_bbox[1])) self.hand_points.append( (result[72] + face_bbox[0], result[73] + face_bbox[1])) self.hand_points.append( (result[78] + face_bbox[0], result[79] + face_bbox[1])) self.hand_points.append( (result[84] + face_bbox[0], result[85] + face_bbox[1])) self.hand_points.append( (result[90] + face_bbox[0], result[91] + face_bbox[1])) self.hand_points.append( (result[62] + face_bbox[0], result[63] + face_bbox[1])) self.hand_points.append( (result[70] + face_bbox[0], result[71] + face_bbox[1])) self.hand_points.append( (result[96] + face_bbox[0], result[97] + face_bbox[1])) self.hand_points.append( (result[108] + face_bbox[0], result[109] + face_bbox[1])) self.hand_points.append( (result[114] + face_bbox[0], result[115] + face_bbox[1])) self.hand_points.append( (result[16] + face_bbox[0], result[17] + face_bbox[1])) for i in self.hand_points: cv2.circle( self.debug_frame, (i[0], i[1]), 2, (255, 0, 0), thickness=1, lineType=8, shift=0, ) reprojectdst, _, pitch, yaw, roll = get_head_pose( np.array(self.hand_points)) """ pitch > 0 Head down, < 0 look up yaw > 0 Turn right < 0 Turn left roll > 0 Tilt right, < 0 Tilt left """ cv2.putText( self.debug_frame, "pitch:{:.2f}, yaw:{:.2f}, roll:{:.2f}".format( pitch, yaw, roll), (face_bbox[0] - 30, face_bbox[1] - 30), cv2.FONT_HERSHEY_COMPLEX, 0.45, (255, 0, 0), ) hand_attitude = np.array( [abs(pitch), abs(yaw), abs(roll)]) max_index = np.argmax(hand_attitude) if max_index == 0: if pitch > 0: cv2.putText( self.debug_frame, "Head down", (face_bbox[0], face_bbox[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (235, 10, 10), ) else: cv2.putText( self.debug_frame, "look up", (face_bbox[0], face_bbox[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (235, 10, 10), ) elif max_index == 1: if yaw > 0: cv2.putText( self.debug_frame, "Turn right", (face_bbox[0], face_bbox[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (235, 10, 10), ) else: cv2.putText( self.debug_frame, "Turn left", (face_bbox[0], face_bbox[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (235, 10, 10), ) elif max_index == 2: if roll > 0: cv2.putText( self.debug_frame, "Tilt right", (face_bbox[0], face_bbox[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (235, 10, 10), ) else: cv2.putText( self.debug_frame, "Tilt left", (face_bbox[0], face_bbox[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (235, 10, 10), ) # Draw a cube with 12 axes line_pairs = [ [0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7], ] for start, end in line_pairs: cv2.line( self.debug_frame, reprojectdst[start], reprojectdst[end], (0, 0, 255), ) except: pass if camera: cv2.imshow("Camera view", self.debug_frame) else: aspect_ratio = self.frame.shape[1] / self.frame.shape[0] cv2.imshow( "Video view", cv2.resize(self.debug_frame, (int(900), int(900 / aspect_ratio))), ) if cv2.waitKey(1) == ord("q"): cv2.destroyAllWindows() break self.fps_handler.printStatus() if not camera: self.cap.release() cv2.destroyAllWindows() self.running = False for thread in self.threads: thread.join(2) if thread.is_alive(): break
class DepthAI: distance_guardian_class = DistanceGuardianDebug distance_bird_class = Bird def __init__(self): self.frame_size = (672, 384) self.create_pipeline() self.start_pipeline() self.fps_handler = FPSHandler() self.distance_guardian = self.distance_guardian_class() self.distance_bird = self.distance_bird_class(self.frame_size) def create_pipeline(self): self.pipeline = dai.Pipeline() cam = self.pipeline.createColorCamera() cam.setResolution( dai.ColorCameraProperties.SensorResolution.THE_1080_P) cam.setPreviewSize(self.frame_size[0], self.frame_size[1]) cam.setInterleaved(False) cam.setBoardSocket(dai.CameraBoardSocket.RGB) self.cam_xout = self.pipeline.createXLinkOut() self.cam_xout.setStreamName("cam_out") self.create_models( "models/person-detection-retail-0013/pedestrian-detection-adas-0002.blob", "model", cam) self.model.passthrough.link(self.cam_xout.input) self.StereoDepthXLink() self.stereo.depth.link(self.model.inputDepth) self.model.passthroughDepth.link(self.stereo_out.input) def create_models(self, model_path, name, cam=None): print(f"正在创建{model_path}神经网络...") self.model = self.pipeline.createMobileNetSpatialDetectionNetwork() self.model.setBlobPath(str(Path(model_path).resolve().absolute())) self.model.setConfidenceThreshold(0.5) self.model.input.setBlocking(False) self.model.setBoundingBoxScaleFactor(0.5) self.model.setDepthLowerThreshold(100) self.model.setDepthUpperThreshold(5000) cam.preview.link(self.model.input) self.model_out = self.pipeline.createXLinkOut() self.model_out.setStreamName(f"{name}_nn") self.model.out.link(self.model_out.input) def StereoDepthXLink(self): print(f"正在创建深度节点...") self.stereo = self.pipeline.createStereoDepth() self.stereo.initialConfig.setConfidenceThreshold(230) self.stereo_out = self.pipeline.createXLinkOut() self.stereo_out.setStreamName("depth") mono_left = self.pipeline.createMonoCamera() mono_left.setResolution( dai.MonoCameraProperties.SensorResolution.THE_400_P) mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT) mono_right = self.pipeline.createMonoCamera() mono_right.setResolution( dai.MonoCameraProperties.SensorResolution.THE_400_P) mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT) mono_left.out.link(self.stereo.left) mono_right.out.link(self.stereo.right) def start_pipeline(self): print(f"启动设备管道...") self.device = dai.Device(self.pipeline) self.camRgb = self.device.getOutputQueue(self.cam_xout.getStreamName(), 4, False) self.person_nn = self.device.getOutputQueue( self.model_out.getStreamName(), 4, False) self.depth = self.device.getOutputQueue( self.stereo_out.getStreamName(), 4, False) def draw_bbox(self, bbox, color, detection): cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) cv2.putText( self.debug_frame, "x: {}m".format(round(detection.spatialCoordinates.x) / 1000), (bbox[0], bbox[1] + 30), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) cv2.putText( self.debug_frame, "y: {}m".format(round(detection.spatialCoordinates.y) / 1000), (bbox[0], bbox[1] + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) cv2.putText( self.debug_frame, "z: {}m".format(round(detection.spatialCoordinates.z) / 1000), (bbox[0], bbox[1] + 70), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) cv2.putText(self.debug_frame, "conf: {}".format(round(detection.confidence * 100)), (bbox[0], bbox[1] + 90), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) def run_model(self): width = self.frame.shape[1] height = self.frame.shape[0] nn_data = self.person_nn.get() if nn_data is not None: boxse = [] detections = nn_data.detections for detection in detections: x_min = int(detection.xmin * width) y_min = int(detection.ymin * height) x_max = int(detection.xmax * width) y_max = int(detection.ymax * height) bbox = (x_min, y_min, x_max, y_max) self.draw_bbox(bbox, (10, 245, 10), detection) boxse.append({ "id": uuid.uuid4(), "x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max, "depth_x": detection.spatialCoordinates.x / 1000, "depth_y": detection.spatialCoordinates.y / 1000, "depth_z": detection.spatialCoordinates.z / 1000 }) results = self.distance_guardian.parse_frame( self.debug_frame, boxse) self.bird_frame = self.distance_bird.parse_frame( self.debug_frame, results, boxse) depthFrame = self.depth.get().getFrame() self.depthFrameColor = cv2.normalize(depthFrame, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) self.depthFrameColor = cv2.equalizeHist(self.depthFrameColor) self.depthFrameColor = cv2.applyColorMap(self.depthFrameColor, cv2.COLORMAP_JET) def parse(self): if debug: self.debug_frame = self.frame.copy() self.run_model() self.fps_handler.tick("NN") if debug: numpy_horizontal = np.hstack((self.debug_frame, self.bird_frame)) cv2.imshow("Frame", numpy_horizontal) cv2.imshow("depth", self.depthFrameColor) if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() raise StopIteration() def run_camera(self): while True: rgb_in = self.camRgb.get() self.frame = rgb_in.getCvFrame() try: self.parse() except StopIteration: break def run(self): self.run_camera() self.fps_handler.printStatus() def __del__(self): del self.pipeline del self.device
def vehicle(source, video_path, output, fps, frame_size): """ 车辆属性识别和车牌识别 """ # click.echo(click.get_current_context().params) device_info = getDeviceInfo() # type: dai.DeviceInfo with dai.Device(create_pipeline(source), device_info) as device: fps_handler = FPSHandler() if source: cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_shape = [frame_height, frame_width] print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape) cap_fps = int(cap.get(cv2.CAP_PROP_FPS)) print("CAP_PROP_FPS: %d" % cap_fps) vehicle_in = device.getInputQueue("vehicle_in") else: cam_out = device.getOutputQueue("rgb") vehicle_nn = device.getOutputQueue("vehicle_nn") attr_in = device.getInputQueue("attr_in") attr_nn = device.getOutputQueue("attr_nn") license_in = device.getInputQueue("license_in") license_nn = device.getOutputQueue("license_nn") colors = ["white", "gray", "yellow", "red", "green", "blue", "black"] types = ["car", "bus", "truck", "van"] license_dict = [ *map(chr, range(48, 58)), "<Anhui>", "<Beijing>", "<Chongqing>", "<Fujian>", "<Gansu>", "<Guangdong>", "<Guangxi>", "<Guizhou>", "<Hainan>", "<Hebei>", "<Heilongjiang>", "<Henan>", "<HongKong>", "<Hubei>", "<Hunan>", "<InnerMongolia>", "<Jiangsu>", "<Jiangxi>", "<Jilin>", "<Liaoning>", "<Macau>", "<Ningxia>", "<Qinghai>", "<Shaanxi>", "<Shandong>", "<Shanghai>", "<Shanxi>", "<Sichuan>", "<Tianjin>", "<Tibet>", "<Xinjiang>", "<Yunnan>", "<Zhejiang>", "<police>", *map(chr, range(65, 91)), ] def should_run(): if source: return cap.isOpened() else: return True def get_frame(): if source: return cap.read() else: return True, cam_out.get().getCvFrame() if output: output.parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*"mp4v") writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size) while should_run(): read_correctly, frame = get_frame() if not read_correctly: break frame_debug = frame.copy() if source: run_nn(frame_debug, vehicle_in, 300, 300) vehicle_data = vehicle_nn.get().detections fps_handler.tick("vehicle") for bbox in vehicle_data: if bbox.label == 1: vehicle_coord = frameNorm( frame_debug, [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax]) cv2.rectangle(frame_debug, vehicle_coord[:2], vehicle_coord[2:], (128, 128, 0)) vehicle_frame = frame[vehicle_coord[1]:vehicle_coord[3], vehicle_coord[0]:vehicle_coord[2], ] run_nn(vehicle_frame, attr_in, 72, 72) attr_data = toTensorResult(attr_nn.get()) color_ = colors[attr_data.get("color").argmax()] type_ = types[attr_data.get("type").argmax()] drawText( frame_debug, color_, (vehicle_coord[0] + 10, vehicle_coord[1] + 10), ) drawText( frame_debug, type_, (vehicle_coord[0] + 10, vehicle_coord[1] + 25), ) elif bbox.label == 2: plate_coord = frameNorm( frame_debug, [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax]) cv2.rectangle(frame_debug, plate_coord[:2], plate_coord[2:], (128, 128, 0)) plate_frame = frame[plate_coord[1]:plate_coord[3], plate_coord[0]:plate_coord[2], ] plate_frame = pad_resize(plate_frame, (24, 94)) # cv2.imshow("pl",plate_frame.astype(np.uint8)) run_nn(plate_frame, license_in, 94, 24) license_data = (toTensorResult( license_nn.get()).get("d_predictions.0").squeeze()) plate_str = "" for j in license_data: if j == -1: break plate_str += license_dict[j] drawText( frame_debug, plate_str, (plate_coord[0] - 10, plate_coord[1] - 10), ) cv2.imshow("", frame_debug) if output: writer.write(cv2.resize(frame_debug, frame_size)) key = cv2.waitKey(1) if key in [ord("q"), 27]: break elif key == ord("s"): cv2.imwrite( "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()), frame_debug, ) fps_handler.printStatus() if source: cap.release() if output: writer.release() cv2.destroyAllWindows()
def yolox(source, model_name, model_size, video_path, output, fps, frame_size): """ This function is used to detect objects in a video :param model_name: The name of the model to use :param model_size: Size of the model :param video_path: Path to the video file :param output: The output file name :param fps: The FPS (frames per second) of the output video :param frame_size: The size of the frame to be saving """ """ 基于 yolox 的目标检测器 """ model_w = model_size[0] model_h = model_size[1] # click.echo(click.get_current_context().params) device_info = getDeviceInfo() # type: dai.DeviceInfo with dai.Device(create_pipeline(source, model_name, model_w, model_h), device_info) as device: print("Starting pipeline...") fps_handler = FPSHandler() if source: cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_shape = [frame_height, frame_width] print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape) cap_fps = int(cap.get(cv2.CAP_PROP_FPS)) print("CAP_PROP_FPS: %d" % cap_fps) yolox_det_in = device.getInputQueue("yolox_det_in") else: cam_out = device.getOutputQueue("cam_out", 1, True) yolox_det_nn = device.getOutputQueue("yolox_det_nn") def should_run(): if source: return cap.isOpened() else: return True def get_frame(): """ Get the current frame from the camera and return it """ if source: return cap.read() else: return True, cam_out.get().getCvFrame() if output: output.parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*"mp4v") writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size) while should_run(): read_correctly, frame = get_frame() fps_handler.tick("Frame") if not read_correctly: break frame_debug = frame.copy() if source: run_nn(yolox_det_in, to_planar(frame, (model_h, model_w)), model_w, model_h) yolox_det_data = yolox_det_nn.get() res = toTensorResult(yolox_det_data).get("output") fps_handler.tick("nn") predictions = demo_postprocess(res, (model_h, model_w), p6=False)[0] boxes = predictions[:, :4] scores = predictions[:, 4, None] * predictions[:, 5:] boxes_xyxy = np.ones_like(boxes) boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0 boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0 boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0 boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0 input_shape = np.array([model_h, model_w]) min_r = (input_shape / frame.shape[:2]).min() offset = (np.array(frame.shape[:2]) * min_r - input_shape) / 2 offset = np.ravel([offset, offset]) boxes_xyxy = (boxes_xyxy + offset[::-1]) / min_r dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.2) if dets is not None: final_boxes = dets[:, :4] final_scores, final_cls_inds = dets[:, 4], dets[:, 5] frame_debug = vis( frame_debug, final_boxes, final_scores, final_cls_inds, conf=0.5, class_names=LABELS.get(model_name), ) cv2.imshow("", frame_debug) if output: writer.write(cv2.resize(frame_debug, frame_size)) key = cv2.waitKey(1) if key in [ord("q"), 27]: break elif key == ord("s"): cv2.imwrite( "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()), frame_debug, ) fps_handler.printStatus() if source: cap.release() if output: writer.release() cv2.destroyAllWindows()
break nm.sendInputFrame(frame) fps.tick('host') if debug: show(frame) cv2.putText(frame, f"RGB FPS: {round(fps.tickFps('host'), 1)}", (5, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) cv2.putText(frame, f"NN FPS: {round(fps.tickFps('nn'), 1)}", (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) cv2.imshow("rgb", frame) key = cv2.waitKey(1) if key == ord('q'): break except KeyboardInterrupt: pass running = False t.join() fps.printStatus() if not args.camera: cap.release()
def palm(source, video_path, output, fps, frame_size): """ 手掌检测,控制鼠标 """ # click.echo(click.get_current_context().params) device_info = getDeviceInfo() # type: dai.DeviceInfo with dai.Device(create_pipeline(source), device_info) as device: fps_handler = FPSHandler() if source: cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_shape = [frame_height, frame_width] print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape) cap_fps = int(cap.get(cv2.CAP_PROP_FPS)) print("CAP_PROP_FPS: %d" % cap_fps) palm_in = device.getInputQueue("palm_in") else: cam_out = device.getOutputQueue("rgb") palm_nn = device.getOutputQueue("palm_nn") dots = [] def should_run(): if source: return cap.isOpened() else: return True def get_frame(): if source: return cap.read() else: return True, cam_out.get().getCvFrame() if output: output.parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*"mp4v") writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size) while should_run(): read_correctly, frame = get_frame() if not read_correctly: break frame_debug = frame.copy() if source: run_nn(frame_debug, palm_in, 128, 128) results = toTensorResult(palm_nn.get()) fps_handler.tick("palm") num_keypoints = 7 min_score_thresh = 0.7 anchors = np.load("anchors_palm.npy") raw_box_tensor = results.get("regressors") # regress raw_score_tensor = results.get("classificators") # classification detections = raw_to_detections(raw_box_tensor, raw_score_tensor, anchors, (128, 128), num_keypoints) palm_coords = [ frameNorm(frame, obj[:4]) for det in detections for obj in det if obj[-1] > min_score_thresh ] palm_confs = [ obj[-1] for det in detections for obj in det if obj[-1] > min_score_thresh ] if len(palm_coords) > 0: palm_coords = non_max_suppression( boxes=np.concatenate(palm_coords).reshape(-1, 4), probs=palm_confs, overlapThresh=0.1, ) for bbox in palm_coords: cv2.rectangle(frame_debug, bbox[:2], bbox[2:], (10, 245, 10)) dot_x = (bbox[2] + bbox[0]) / 2 dot_y = (bbox[3] + bbox[1]) / 2 dots = move_mouse(dots, (dot_x, dot_y), frame.shape[:2]) cv2.imshow("", frame_debug) if output: writer.write(cv2.resize(frame_debug, frame_size)) key = cv2.waitKey(1) if key in [ord("q"), 27]: break elif key == ord("s"): cv2.imwrite( "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()), frame_debug, ) fps_handler.printStatus() if source: cap.release() if output: writer.release() cv2.destroyAllWindows()
def facial_info(source, video_path, output, fps, frame_size): """ 面部信息识别 """ device_info = getDeviceInfo() # type: dai.DeviceInfo with dai.Device(create_pipeline(source), device_info) as device: print("Starting pipeline...") # device.startPipeline() if source: cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_shape = [frame_height, frame_width] print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape) cap_fps = int(cap.get(cv2.CAP_PROP_FPS)) print("CAP_PROP_FPS: %d" % cap_fps) face_in = device.getInputQueue("face_in") else: cam_out = device.getOutputQueue("cam_out", 1, True) face_nn = device.getOutputQueue("face_nn") head_pose_in = device.getInputQueue("head_pose_in") head_pose_nn = device.getOutputQueue("head_pose_nn") age_in = device.getInputQueue("age_in") age_nn = device.getOutputQueue("age_nn") emo_in = device.getInputQueue("emo_in") emo_nn = device.getOutputQueue("emo_nn") if output: output.parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*"mp4v") writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size) def should_run(): if source: return cap.isOpened() else: return True def get_frame(): if source: return cap.read() else: return True, cam_out.get().getCvFrame() fps_handler = FPSHandler() while should_run(): read_correctly, frame = get_frame() if not read_correctly: break frame_debug = frame.copy() if source: run_nn(face_in, frame, 300, 300) face_nn_data = face_nn.get() fps_handler.tick("All") if face_nn_data is not None: bboxes = face_nn_data.detections for bbox in bboxes: face_coord = frameNorm( frame_debug, [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax]) face_frame = frame[face_coord[1]:face_coord[3], face_coord[0]:face_coord[2], ] cv2.rectangle( frame_debug, (face_coord[0], face_coord[1]), (face_coord[2], face_coord[3]), (0, 0, 0), ) run_nn(head_pose_in, face_frame, 60, 60) roll_degree = toTensorResult( head_pose_nn.get()).get("angle_r_fc")[0][0] center = ( (face_coord[2] + face_coord[0]) / 2, (face_coord[3] + face_coord[1]) / 2, ) size = ( (face_coord[2] - face_coord[0]), (face_coord[3] - face_coord[1]), ) face_frame_corr = rotate_frame( frame, center, size, roll_degree, ) cv2.imshow("face_frame_corr", face_frame_corr) run_nn(age_in, face_frame_corr, 62, 62) age_gender = toTensorResult(age_nn.get()) age = age_gender.get("age_conv3").squeeze() * 100 # 0 - female, 1 - male gender = "Male" if age_gender.get( "prob").argmax() else "Female" drawText( frame_debug, f"Age: {age:0.0f}", (face_coord[0] + 10, face_coord[1] + 30), color="greenyellow", ) drawText( frame_debug, f"Gender: {gender}", (face_coord[0] + 10, face_coord[1] + 50), "greenyellow", ) run_nn(emo_in, face_frame_corr, 64, 64) # 0 - 'neutral', 1 - 'happy', 2 - 'sad', 3 - 'surprise', 4 - 'anger' emo = ["neutral", "happy", "sad", "surprise", "anger"] emo = emo[toTensorResult( emo_nn.get()).get("prob_emotion").argmax()] drawText( frame_debug, f"emo: {emo}", (face_coord[0] + 10, face_coord[1] + 70), "greenyellow", ) if output: writer.write(cv2.resize(frame_debug, frame_size)) cv2.imshow("debug", frame_debug) key = cv2.waitKey(1) if key in [ord("q"), 27]: break elif key == ord("s"): cv2.imwrite( "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()), frame_debug, ) if source: cap.release() if output: writer.release() fps_handler.printStatus() cv2.destroyAllWindows()