def framewise_recognize(pose, pretrained_model): frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features)] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append([bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) trk_id = 'ID-' + str(trk.track_id) cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1]-45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 3) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin # id = int(d[4]) try: tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: j = 0 if joints_norm_per_frame.size > 0: joints_norm_single_person = joints_norm_per_frame[j*36:(j+1)*36] joints_norm_single_person = np.array(joints_norm_single_person).reshape(-1, 36) pred = np.argmax(pretrained_model.predict(joints_norm_single_person)) init_label = Actions(pred).name cv.putText(frame, init_label, (xmin + 80, ymin - 45), cv.FONT_HERSHEY_SIMPLEX, 1, trk_clr, 3) if init_label == 'fall_down': cv.putText(frame, 'WARNING: someone is falling down!', (20, 60), cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 4) cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame
def get_pose_dict(pose, cur_pose, frame_count, cur_box): cur_box.clear() frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) #720p下的所有关节点坐标(相对坐标) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # 进行非极大抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # 调用tracker并实时更新 tracker.predict() tracker.update(detections) # 记录track的结果,包括bounding boxes及其ID trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin try: # xcenter是一帧图像中所有human的1号关节点(neck)的x坐标值 # 通过计算track_box与human的xcenter之间的距离,进行ID的匹配 tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # 若当前帧无human,默认j=0(无效) j = 0 if joints_norm_per_frame.size > 0 and frame_count % 5 == 0: #################################### tmp = [frame_count] tmp.extend(joints_norm_per_frame[j * 36:(j + 1) * 36]) cur_pose[d[4]] = tmp cur_box[d[4]] = [xmin, ymin, xmax, ymax] elif joints_norm_per_frame.size > 0: cur_box[d[4]] = [xmin, ymin, xmax, ymax] return 1
def framewise_recognize(pose, pretrained_model): frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) trk_id = 'ID-' + str(trk.track_id) cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 3) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin # id = int(d[4]) try: tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: j = 0 cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame
def framewise_recognize(pose, pretrained_model): frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # 비 최대 억제 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # 이전 tracker 실시간 추적 및 업데이트 tracker.predict() tracker.update(detections) # bounding boxes 및 해당 ID를 포함하는 track의 결과를 기록 trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) # track_ID 표시 trk_id = 'ID-' + str(trk.track_id) cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 3) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin try: # xcenter는 이미지 프레임에서 모든 인간의 No.1 조인트 포인트임(neck)X좌표 값 # track_box와 human xcenter 사이의 거리를 계산하여 ID 매칭을 수행함 tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # 현재 프래임에 사람이 없으면 기본값 j=0(무효) j = 0 # 행동 분류 수행 if joints_norm_per_frame.size > 0: joints_norm_single_person = joints_norm_per_frame[j * 36:(j + 1) * 36] joints_norm_single_person = np.array( joints_norm_single_person).reshape(-1, 36) pred = np.argmax( pretrained_model.predict(joints_norm_single_person)) init_label = Actions(pred).name # Action 카테고리 표시 cv.putText(frame, init_label, (xmin + 80, ymin - 45), cv.FONT_HERSHEY_SIMPLEX, 1, trk_clr, 3) # 비정상적인 경고(under scene) if init_label == 'fall_down': cv.putText(frame, 'WARNING: someone is falling down!', (20, 60), cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 4) # track_box 그리기 cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame
def framewise_recognize(pose, pretrained_model): frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # 进行非极大抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # 调用tracker并实时更新 tracker.predict() tracker.update(detections) # 记录track的结果,包括bounding boxes及其ID trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) # 标注track_ID trk_id = 'ID-' + str(trk.track_id) cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 3) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin id = int(d[4]) # <<<< try: # xcenter is the x coordinate value of the joint point (neck) of all humans in a frame of image # By calculating the distance between track_box and human xcenter, ID matching tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # no humans !! j = 0 # faire la classification ========================================================= if joints_norm_per_frame.size > 0: joints_norm_single_person = joints_norm_per_frame[j * 36:(j + 1) * 36] #joints_norm_single_person = np.array(joints_norm_single_person).reshape(-1, 36) #print("==========================================") #print(id) #print(len(joints_norm_single_person[0])) #print(joints_norm_single_person[0]) #print("==========================================") exists, ret = lstm_pred.update_ctx(id, joints_norm_single_person) # print(exists) # print("----") if exists: print(f"predicting for {id}") #print(np.array(lstm_pred.ctx[id]).reshape(-1, 4, 36).shape) # الحكمة ممبعد تجي هنا ret = np.array(ret).reshape(-1, 4, 36) #print(pretrained_model.predict(ret)) pred = np.argmax(pretrained_model.predict(ret)) print(pred) init_label = Actions(pred).name # 显示动作类别 cv.putText(frame, init_label, (xmin + 80, ymin - 45), cv.FONT_HERSHEY_SIMPLEX, 1, trk_clr, 3) # 异常预警(under scene) if init_label == 'fall_down': cv.putText(frame, 'WARNING: someone is falling down!', (20, 60), cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 4) # 画track_box cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame
def framewise_recognize(pose, pretrained_model, camera): global last_init_label, capture_picture_count, start_time, fir, fir_ID, getSignal frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # 进行非极大抑制,选择置信得分最高的边框并保留 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # 调用tracker并实时更新 tracker.predict() tracker.update(detections) # 记录track的结果,包括bounding boxes及其ID trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) # 标注track_ID trk_id = 'ID-' + str(trk.track_id) cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 3) # 找出第一个进入镜头的人的ID if fir == 'true': fir = 'false' fir_ID = trk.track_id print("First person ID:{}".format(fir_ID)) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin id = d[4] try: # xcenter是一帧图像中所有human的1号关节点(neck)的x坐标值 # 通过计算track_box与human的xcenter之间的距离,进行ID的匹配 tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # 若当前帧无human,默认j=0(无效) j = 0 # 进行动作分类 if joints_norm_per_frame.size > 0: joints_norm_single_person = joints_norm_per_frame[j * 36:(j + 1) * 36] joints_norm_single_person = np.array( joints_norm_single_person).reshape(-1, 36) pred = np.argmax( pretrained_model.predict(joints_norm_single_person)) init_label = Actions(pred).name # 显示动作类别 cv.putText(frame, init_label, (xmin + 80, ymin - 45), cv.FONT_HERSHEY_SIMPLEX, 1, trk_clr, 3) # 检测到指定动作,拍摄照片,瞬间拍照 # if (init_label == 'wave' and last_init_label !='wave' and capture_picture_count <= 10 and time.time() - start_time > 2 and id == fir_ID): # ret, frame = camera.read() # filename = "img/getPicutre-{0}.jpg".format(capture_picture_count) # cv.imwrite(filename, frame) # print("Image saved at {}".format(filename)) # capture_picture_count=capture_picture_count+1 # start_time=time.time() # 检测到指定动作,在1.5s后拍照 if (init_label == 'wave' and getSignal == 'false'): getSignal = 'true' print("Prepare to get image\n") start_time = time.time() if (getSignal == 'true' and time.time() - start_time > 1.5): getSignal = 'false' ret, frame = camera.read() filename = "img/getPicutre-{0}.jpg".format( capture_picture_count) cv.imwrite(filename, frame) print("Image saved at {}\n".format(filename)) capture_picture_count = capture_picture_count + 1 # 检测到指定动作,改变相机参数,待实现 # else if(init_label == 'XXX' and last_init_label !='XXX' and time.time() - start_time > 1 and id == fir_ID) # myGain=cap.get(cv2.CAP_PROP_GAIN) # if(myGain < 20) # cap.set(cv2.CAP_PROP_GAIN, myGain+2) #调整增益 last_init_label = init_label # 画track_box cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame
def show_camera(self): start = time.time() ret, frame = self.cap.read() # image_h, image_w = frame.shape[:2] # (720x960) #show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if ret: if self.__flag_mode == 1: self.infoBox.setText(u'Pose estimation') humans = my_detector.detect(show) #skelsInfo = SkeletonDetector.humans_to_skelsInfo(humans) #print('SkelsInfo: ', skelsInfo) my_detector.draw(show, humans) # for ith_skel in range(0, len(skelsInfo)): # skeleton = SkeletonDetector.get_ith_skeleton(skelsInfo, ith_skel) # if ith_skel:#ith_skel == 0: elif self.__flag_mode == 2: self.infoBox.setText(u'Multiplayer tracking') prediced_label = '' humans = my_detector.detect(show) my_detector.draw(show, humans) skelsInfo, skelsInfo_choose, bboxes = SkeletonDetector.humans_to_skelsInfo( humans, joint_choose) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # Non-maximal suppression 进行非极大抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call tracker and update in real time 调用tracker并实时更新 tracker.predict() tracker.update(detections) # Record track results, including bounding boxes and their ID 记录track的结果,包括bounding boxes及其ID trk_result = [] #trk.track_id = 1 for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() print('bbox: ', bbox) trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) # Đánh dấu track_ID trk_id = 'ID-' + str(trk.track_id) print('track ID: ', trk_id) cv2.putText(show, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3) ################################ for d in trk_result: # gom nhieu bboxes xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin label = int(d[4]) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) elif self.__flag_mode == 3: self.infoBox.setText(u'Sign language recognition') humans = my_detector.detect(show) my_detector.draw(show, humans) #skelsInfo_choose = SkeletonDetector.humans_to_skelsInfo_choose(humans, joint_choose) # cac skeleton_choose skelsInfo, skelsInfo_choose, bboxes = SkeletonDetector.humans_to_skelsInfo( humans, joint_choose) # cac skeleton_full # if len(skelsInfo) == 0: # self.text = '' if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # Non-maximal suppression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call tracker and update in real time tracker.predict() tracker.update(detections) # Record track results, including bounding boxes and their ID trk_result = [] #trk.track_id = 1 for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() #print('bbox: ', bbox) trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) # Đánh dấu track_ID trk_id = 'ID-' + str(trk.track_id) #print('track ID: ', trk_id) cv2.putText(show, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3) ################################ list_label = [] for d in trk_result: # gom nhieu bboxes xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin label = int(d[4]) list_label.append(label) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) try: # xcenter là giá trị tọa độ x của tất cả các khớp (cổ) của con người trong một khung hình ảnh # Khớp ID bằng cách tính khoảng cách giữa track_box và xcenter của con người tmp = np.array( [abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # Nếu không có người trong khung hiện tại, mặc định j = 0 (không hợp lệ) j = 0 if skelsInfo_choose: skeleton_choose = skelsInfo_choose[j] skeleton = skelsInfo[j] skeleton = np.array(skeleton[1:1 + 18 * 2]) #skeleton = if len(skeleton_choose) == 20: #skeleton_choose = np.array(skelsInfo_choose).reshape(-1,20) prediced_label, acc, vni_prediced_label = classifier.predict( skeleton_choose) if acc > 0.94: if label not in list(self.dict): self.dict[label] = vni_prediced_label self.dict_check[ label] = vni_prediced_label if label in list(self.dict) and ( vni_prediced_label != self.dict_check[label]): self.dict[label] = self.dict[ label] + ' ' + vni_prediced_label self.dict_check[ label] = vni_prediced_label #print('prediced_label: ', prediced_label) cv2.putText(show, prediced_label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), 4, cv2.LINE_AA) if len(self.dict_check) > 0: for i in list(self.dict_check): if i not in list_label: del self.dict_check[i] del self.dict[i] self.text = '' for i in list(self.dict): self.text = self.text + 'ID-' + str( i) + ': ' + self.dict[i] + '\n' self.textBox.setText(self.text) end = time.time() self.fps = 1. / (end - start) cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) showImage = QtGui.QImage(show, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) self.label_show_camera.setPixmap( QtGui.QPixmap.fromImage(showImage))
def framewise_recognize(pose, pretrained_model): frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] joints_norm_per_frame = np.array(pose[-1]) #720p下的所有关节点坐标(相对坐标) if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # 进行非极大抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #先 detection再track # 调用tracker并实时更新 tracker.predict() tracker.update(detections) # 记录track的结果,包括bounding boxes及其ID trk_result = [] for trk in tracker.tracks: if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) #bbox[0]:ROI左上角x bbox[1]:ROI左上角y,bbox[2]:ROI宽度,bbox[3]:ROI高度 # 标注track_ID trk_id = 'ID-' + str(trk.track_id) cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1] - 45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 3) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin # id = int(d[4]) try: # xcenter是一帧图像中所有human的1号关节点(neck)的x坐标值 # 通过计算track_box与human的xcenter之间的距离,进行ID的匹配 tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # 若当前帧无human,默认j=0(无效) j = 0 # 进行动作分类 previously tried if joints_norm_per_frame.size > 0: joints_norm_single_person = joints_norm_per_frame[j * 36:(j + 1) * 36] joints_norm_single_person = np.array( joints_norm_single_person).reshape(-1, 36) #joints_norm_single_person = np.array(joints_norm_single_person).reshape(-1, 6,6)##changed be caution pred = np.argmax( pretrained_model.predict(joints_norm_single_person)) init_label = Actions(pred).name # 显示动作类别 cv.putText(frame, init_label, (xmin + 80, ymin - 45), cv.FONT_HERSHEY_SIMPLEX, 1, trk_clr, 3) # 异常预警(under scene) if init_label == 'fall_down': cv.putText(frame, 'WARNING: someone is falling down!', (20, 60), cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 4) # 画track_box cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame
def framewise_recognize(pose, pretrained_model): global fall_num frame, joints, bboxes, xcenter = pose[0], pose[1], pose[2], pose[3] #frame是已经标记好骨骼点的图片 joints_norm_per_frame = np.array(pose[-1]) # Ano_list = draw_region_detection(frame) #得到检测范围坐标 # boxss = bboxes #box的格式为tlxmin tlymin width height # boxs = [] # # # 标定检测范围的代码 # for i in range(len(boxss)): # bb = boxss[i] # if bb[0] > Ano_list[0] and bb[0] < Ano_list[2]: # 判断要检测的范围 288,189 # boxs.append(boxss[i]) # 也就是不检测 不匹配 # bboxes = boxs #追踪部分 if bboxes: bboxes = np.array(bboxes) features = encoder(frame, bboxes) #获得128维的特征的编码 deep_sort中的 #每个box相当于一个patch #将patch送到deep sort的网络中去进行编码 # score to 1.0 here). # 特征和bboxes对应 这个句话也就是转换了一下数据类型 也相当于建立了这个类型的实例 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features) ] # 进行非极大抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #选取最终的都的目标坐标 # 调用tracker并实时更新 # 第一帧作为初始化 后面根据前帧 后帧的距离判断 达到跟踪的目的 tracker.predict() #得到已有目标预测后的mean 和 covariance 用来作为判断依据 tracker.update(detections) #是不是更新了跟踪的目标集合 检测到的目标集合 # 记录track的结果,包括bounding boxes及其ID trk_result = [] #好多利用for直接实例化的例子用法啊 for trk in tracker.tracks: # 这是一个list 啊 对每个逐一判断 if not trk.is_confirmed() or trk.time_since_update > 1: continue bbox = trk.to_tlwh() #tlah - > to top left coord and weight,height trk_result.append( [bbox[0], bbox[1], bbox[2], bbox[3], trk.track_id]) #t,l # 标注track_ID trk_id = 'ID-' + str(trk.track_id) # cv.putText(frame, trk_id, (int(bbox[0]), int(bbox[1]-45)), cv.FONT_HERSHEY_SIMPLEX, 0.8, trk_clr, 2) for d in trk_result: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) + xmin ymax = int(d[3]) + ymin # id = int(d[4]) try: # xcenter是一帧图像中所有human的1号关节点(neck)的x坐标值 # 通过计算track_box与human的xcenter之间的距离,进行ID的匹配 tmp = np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]) j = np.argmin(tmp) except: # 若当前帧无human,默认j=0(无效) j = 0 # 进行动作分类 if joints_norm_per_frame.size > 0: #每个人具有36个维度 18个位置的关节点 joints_norm_single_person = joints_norm_per_frame[j * 36:(j + 1) * 36] joints_norm_single_person = np.array( joints_norm_single_person).reshape(-1, 36) pred = np.argmax( pretrained_model.predict( joints_norm_single_person)) #只是keras里的predict init_label = Actions(pred).name #得到enum标签的名称 # 显示动作类别 ymin - 40 # cv.putText(frame, init_label, (xmin + 30, ymax + 20), cv.FONT_HERSHEY_SIMPLEX, 1, trk_clr, 2) # 异常预警(under scene) if init_label == 'fall_down': fall_num += 1 if fall_num > 2: #至少连续2帧出现异常时,才能出现报错的情况 fall_num = 2 if fall_num == 2: cv.putText(frame, 'WARNING: someone is Falling down!', (20, 60), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) else: fall_num = 0 print('fall_num:{}'.format(fall_num)) # 画track_box # cv.rectangle(frame, (xmin - 10, ymin - 30), (xmax + 10, ymax), trk_clr, 2) return frame