def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize']]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt['savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize']]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image(orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 face_hand_num = 110 if hm_data.size()[1] == 136: self.eval_joints = [*range(0,136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0,26)] elif hm_data.size()[1] == 133: self.eval_joints = [*range(0,133)] elif hm_data.size()[1] == 68: face_hand_num = 42 self.eval_joints = [*range(0,68)] elif hm_data.size()[1] == 21: self.eval_joints = [*range(0,21)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() if isinstance(self.heatmap_to_coord, list): pose_coords_body_foot, pose_scores_body_foot = self.heatmap_to_coord[0]( hm_data[i][self.eval_joints[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords_face_hand, pose_scores_face_hand = self.heatmap_to_coord[1]( hm_data[i][self.eval_joints[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0) pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0) else: pose_coord, pose_score = self.heatmap_to_coord(hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area, use_heatmap_loss=self.use_heatmap_loss) _result = [] for k in range(len(scores)): _result.append( { 'keypoints':preds_img[k], 'kp_score':preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx':ids[k], 'box':[boxes[k][0], boxes[k][1], boxes[k][2]-boxes[k][0],boxes[k][3]-boxes[k][1]] } ) result = { 'imgname': im_name, 'result': _result } if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step(orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, self.opt, self.vis_thres) self.write_image(img, im_name, stream=stream if self.save_video else None)
def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # ======头部姿态估计准备========= if self.head_pose: # 进行头部姿态估计 pose_estimator = PoseEstimator(img_size=self.opt.img_size) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] masks_list = [] # 头部关键点列表 emoji_available_list = [] # 需要进行表情识别的目标的索引 face_naked_rate = [] # 所有人的脸部露出率 # keep looping infinitelyd while True: if self.opt.tracking: # 处理重识别状态 reid_states = self.reid_states reid_global_states = self.reid_global_states reid_global_states["frame"] = (reid_global_states["frame"] + 1) % 9999 current_time = time.time() reid_global_states[ "interval"] = current_time - reid_global_states['time'] reid_global_states['time'] = current_time # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # ==========================进一步处理================================= # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 # pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) if len(preds_img) != 0: preds_img = torch.stack(preds_img) # print(boxes[0], cropped_boxes[0],hm_data[0].shape) # =========================目标检测对象处理=========================== if len(preds_img) != 0: if self.head_pose: masks_list.clear() emoji_available_list.clear() for i in range(preds_img.shape[0]): if self.opt.tracking: self_state = self.get_reid_state( ids[i], reid_states, reid_global_states) self_state['index'] = i # ===头部姿态估计相关====== if self.head_pose: # 取出脸部关键点 face_keypoints = preds_img[i, 26:94] face_keypoints_scores = preds_scores[i, 26:94] # 获取标准化后的人脸关键点坐标 # scale_face_keypoints, _ = self.get_scaled_face_keypoints(face_keypoints) # =====脸部露出判定====== face_naked = torch.sum(face_keypoints_scores[27:48] > 0.01) / 21 # 这部分暂时不包括嘴部数据 mouth_naked = torch.sum( face_keypoints_scores[48:68] > 0.1 ) / 20 # 这部分是嘴部的裸露程度 if face_naked > 0.5 or mouth_naked > 0.5: # 判断是否能够识别表情 emoji_available_list.append(i) # ====指标====脸部遮挡检测======= if self_state is not None: self.face_hide(self_state, reid_global_states, face_naked) # ====进行头部姿态估计===== self.estimate_head_pose(pose_estimator, face_keypoints, masks_list) # ==口型识别== 打哈欠和说话 if mouth_naked > 0.5 and False: scaled_mouth_keypoints, _ = self.get_scaled_mouth_keypoints( face_keypoints) mouth_distance = self.mouth_open_degree( scaled_mouth_keypoints) if mouth_distance[1] > 0.3: open_mouth = "open mouth!!!!" elif mouth_distance[1] > 0.2: open_mouth = "open" else: open_mouth = "" print(mouth_distance, open_mouth) # =====开始表情识别===== # =========================目标检测对象处理完成========================= # =========================整理数据======================== _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) # ==========================绘图================================= if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame, DEFAULT_FONT elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame, DEFAULT_FONT else: from alphapose.utils.vis import vis_frame, DEFAULT_FONT # 开始绘图============== img = vis_frame(orig_img, result, self.opt) if self.head_pose and len(masks_list) != 0: for p in masks_list: pose_estimator.draw_annotation_box(img, p[0], p[1], color=(128, 255, 128)) if self.opt.tracking: # 行人重识别状态 for _id in ids: _state = reid_states[_id] index = _state['index'] bbox = _result[index]['box'] bbox = [ bbox[0], bbox[0] + bbox[2], bbox[1], bbox[1] + bbox[3] ] cv2.putText( img, f'no focus: {round(_state["face_hide_rate"], 2)}', (int(bbox[0]), int((bbox[2] + 52))), DEFAULT_FONT, 1, (255, 0, 0), 2) # 结束绘图==============》显示图片 self.write_image( img, im_name, stream=stream if self.save_video else None)
def update(self): if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread self.wait_and_put(self.final_result_queue, None) if self.save_video: stream.release() return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) pred = hm_data.cpu().data.numpy() assert pred.ndim == 4 pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( pred[i][EVAL_JOINTS], bbox) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) result = pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) result = {'imgname': im_name, 'result': result} if self.opt.pose_track: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] self.wait_and_put(self.final_result_queue, result) if self.opt.save_img or self.save_video or self.opt.vis: if self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, add_bbox=(self.opt.pose_track | self.opt.tracking)) self.write_image( img, im_name, stream=stream if self.save_video else None)
def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 #pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) #print("lbh key point" + str(k) + str(preds_img[k])) #print([int(boxes[k][0]), boxes[k][1], boxes[k][2], boxes[k][3]]) box_img = orig_img[int(boxes[k][1]):int(boxes[k][3]), int(boxes[k][0]):int(boxes[k][2])] orig_img = orig_img.copy() pose_coord_array = pose_coords[k].numpy() pose_text = [ 'Nose', 'LEye', 'REye', 'LEar', 'REar', 'LShoulder', 'RShoulder', 'LElbow', 'RElbow', 'LWrist', 'RWrist', 'LHip', 'RHip', 'LKnee', 'Rknee', 'LAnkle', 'RAnkle' ] for i in range(len(pose_text)): #pass orig_img = cv2.putText(orig_img, pose_text[i], (pose_coord_array[0][i][0], pose_coord_array[0][i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) if self.is_fall(pose_coord_array): print('falling') cv2.rectangle(orig_img, (int(boxes[k][0]), int(boxes[k][1])), (int(boxes[k][2]), int(boxes[k][3])), (0, 0, 255), 2) fall_img = orig_img[int(boxes[k][1]):int(boxes[k][3]), int(boxes[k][0]):int(boxes[k][2])] if not os.path.exists(self.opt.outputpath + '/fall'): os.mkdir(self.opt.outputpath + '/fall') #cv2.imshow("fall", fall_img) fall_img_name = os.path.join(self.opt.outputpath, 'fall', str(time.time()) + '.jpg') cv2.imwrite(fall_img_name, fall_img) else: print('not falling') cv2.rectangle(orig_img, (int(boxes[k][0]), int(boxes[k][1])), (int(boxes[k][2]), int(boxes[k][3])), (0, 255, 0), 2) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, self.opt) self.write_image( img, im_name, stream=stream if self.save_video else None)
def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 #pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if len(self.opt.server) > 0: import requests, json, copy if self.opt.post_image: result = trans_to_json([result]) img = self.act_model.solve_image(result, orig_img) frame_encoded = cv2.imencode(".jpg", img)[1] files = {'image': frame_encoded.tostring()} for idx, person in enumerate( result): # result (list of dict) person.pop("keypoints") person.pop("image_id") person.pop("category_id") person.pop("score") person.pop("idx") requests.post(self.opt.server, files=files, data={"result": json.dumps(result)}) else: result = trans_to_json([result]) self.act_model.make_act_score(result) for idx, person in enumerate(result): person.pop("keypoints") person.pop("image_id") person.pop("category_id") person.pop("score") person.pop("idx") requests.post(self.opt.server, json=result) if self.opt.save_img or self.save_video or self.opt.vis: result = trans_to_json([result]) img = self.act_model.solve_image(result, orig_img) self.write_image( img, im_name, stream=stream if self.save_video else None)
def update(self): norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE # get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.item if orig_img is None: return None # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] self.orig_img = orig_img if boxes is None or len(boxes) == 0: return None else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] elif hm_data.size()[1] == 133: self.eval_joints = [*range(0, 133)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() if isinstance(self.heatmap_to_coord, list): pose_coords_body_foot, pose_scores_body_foot = self.heatmap_to_coord[ 0](hm_data[i][self.eval_joints[:-110]], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords_face_hand, pose_scores_face_hand = self.heatmap_to_coord[ 1](hm_data[i][self.eval_joints[-110:]], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coord = np.concatenate( (pose_coords_body_foot, pose_coords_face_hand), axis=0) pose_score = np.concatenate( (pose_scores_body_foot, pose_scores_face_hand), axis=0) else: pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area, use_heatmap_loss=self.use_heatmap_loss) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'bbox': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame self.vis_frame = vis_frame return result
def validate(m, heatmap_to_coord, batch_size=20): det_dataset = builder.build_dataset(cfg.DATASET.TEST, preset_cfg=cfg.DATA_PRESET, train=False, opt=opt) eval_joints = det_dataset.EVAL_JOINTS det_loader = torch.utils.data.DataLoader(det_dataset, batch_size=batch_size, shuffle=False, num_workers=20, drop_last=False) kpt_json = [] m.eval() norm_type = cfg.LOSS.get('NORM_TYPE', None) hm_size = cfg.DATA_PRESET.HEATMAP_SIZE combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') halpe = (cfg.DATA_PRESET.NUM_JOINTS == 133) or (cfg.DATA_PRESET.NUM_JOINTS == 136) for inps, crop_bboxes, bboxes, img_ids, scores, imghts, imgwds in tqdm( det_loader, dynamic_ncols=True): if isinstance(inps, list): inps = [inp.cuda() for inp in inps] else: inps = inps.cuda() output = m(inps) if opt.flip_test: if isinstance(inps, list): inps_flip = [flip(inp).cuda() for inp in inps] else: inps_flip = flip(inps).cuda() output_flip = flip_heatmap(m(inps_flip), det_dataset.joint_pairs, shift=True) pred_flip = output_flip[:, eval_joints, :, :] else: output_flip = None pred_flip = None pred = output assert pred.dim() == 4 pred = pred[:, eval_joints, :, :] if output.size()[1] == 68: face_hand_num = 42 else: face_hand_num = 110 for i in range(output.shape[0]): bbox = crop_bboxes[i].tolist() if combined_loss: pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[ 0](pred[i][det_dataset.EVAL_JOINTS[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type, hms_flip=pred_flip[i][ det_dataset.EVAL_JOINTS[:-face_hand_num]] if pred_flip is not None else None) pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[ 1](pred[i][det_dataset.EVAL_JOINTS[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type, hms_flip=pred_flip[i][ det_dataset.EVAL_JOINTS[-face_hand_num:]] if pred_flip is not None else None) pose_coords = np.concatenate( (pose_coords_body_foot, pose_coords_face_hand), axis=0) pose_scores = np.concatenate( (pose_scores_body_foot, pose_scores_face_hand), axis=0) else: pose_coords, pose_scores = heatmap_to_coord( pred[i][det_dataset.EVAL_JOINTS], bbox, hm_shape=hm_size, norm_type=norm_type, hms_flip=pred_flip[i][det_dataset.EVAL_JOINTS] if pred_flip is not None else None) keypoints = np.concatenate((pose_coords, pose_scores), axis=1) keypoints = keypoints.reshape(-1).tolist() data = dict() data['bbox'] = bboxes[i, 0].tolist() data['image_id'] = int(img_ids[i]) data['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) data['score'] = float(scores[i] + np.mean(pose_scores) + 1.25 * np.max(pose_scores)) # data['score'] = float(scores[i]) data['category_id'] = 1 data['keypoints'] = keypoints kpt_json.append(data) if opt.ppose_nms: from alphapose.utils.pPose_nms import ppose_nms_validate_preprocess, pose_nms, write_json final_result = [] tmp_data = ppose_nms_validate_preprocess(kpt_json) for key in tmp_data: boxes, scores, ids, preds_img, preds_scores = tmp_data[key] boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, 0, cfg.LOSS.get('TYPE') == 'MSELoss') _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) im_name = str(key).zfill(12) + '.jpg' result = {'imgname': im_name, 'result': _result} final_result.append(result) write_json(final_result, './exp/json/', form='coco', for_eval=True, outputfile='validate_rcnn_kpt.json') else: if opt.oks_nms: from alphapose.utils.pPose_nms import oks_pose_nms kpt_json = oks_pose_nms(kpt_json) with open('./exp/json/validate_rcnn_kpt.json', 'w') as fid: json.dump(kpt_json, fid) sysout = sys.stdout res = evaluate_mAP('./exp/json/validate_rcnn_kpt.json', ann_type='keypoints', ann_file=os.path.join(cfg.DATASET.TEST.ROOT, cfg.DATASET.TEST.ANN), halpe=halpe) sys.stdout = sysout return res
def update(self): #### person_height = 165 frame_offset = 20 max_diff_angle = 15 max_diff_distance = 10 N_angle = 23 N_distance = 20 # frames = [] ground_points = [] head_points = [] final_result = [] final_angles = {'Frame': []} final_min_angles = {'Frame': []} final_max_angles = {'Frame': []} final_distances = {'Frame': []} final_min_distances = {'Frame': []} final_max_distances = {'Frame': []} # for i in range(1, N_angle + 1): final_angles['Angle_' + str(i)] = [] final_min_angles['Angle_' + str(i)] = [] final_max_angles['Angle_' + str(i)] = [] for i in range(1, N_distance + 1): final_distances['Distance_' + str(i)] = [] final_min_distances['Distance_' + str(i)] = [] final_max_distances['Distance_' + str(i)] = [] # frame = 0 min_angle = 180 max_angle = 0 min_distance = person_height + 100 max_distance = 0 ##### norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 #pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, self.opt) ##### frame += 1 if frame <= frame_offset: ground_point, head_point = self.calc_bound_points( result, vis_thres=0.4) if ground_point is not None: ground_points.append(ground_point) x_point = [x for x, _ in ground_points] y_point = [y for _, y in ground_points] ground_point = (int(np.average(x_point)), int(np.average(y_point))) if head_point is not None: head_points.append(head_point) x_point = [x for x, _ in head_points] y_point = [y for _, y in head_points] head_point = (int(np.average(x_point)), int(np.average(y_point))) if ground_point is not None and head_point is not None: dist_height = np.linalg.norm( np.array(head_point) - np.array(ground_point)) height_ratio = person_height / (dist_height + 1e-6) else: height_ratio = 0 distances = self.calc_distances(result, ground_point, head_point, height_ratio, vis_thres=0.4) angles = self.calc_angles(result, vis_thres=0.4) frames.append(frame) final_angles['Frame'].append(frame) final_min_angles['Frame'].append(frame) final_max_angles['Frame'].append(frame) final_distances['Frame'].append(frame) final_min_distances['Frame'].append(frame) final_max_distances['Frame'].append(frame) ## for angle_name, angle in angles.items(): angle = int(angle) if angle < 0 and frame > frame_offset: angle = final_angles[angle_name][frame - 2] ## final_angles[angle_name].append(angle) ## if frame <= frame_offset: if angle >= 0 and angle < min_angle: final_min_angles[angle_name].append(angle) else: final_min_angles[angle_name].append(min_angle) if angle >= 0 and angle > max_angle: final_max_angles[angle_name].append(angle) else: final_max_angles[angle_name].append(max_angle) else: previous_min_angle = final_min_angles[angle_name][ frame - 2] previous_max_angle = final_max_angles[angle_name][ frame - 2] diff_angle = abs( final_angles[angle_name][frame - 1] - final_angles[angle_name][frame - 2]) if angle >= 0 and angle < previous_min_angle and diff_angle < max_diff_angle: final_min_angles[angle_name].append(angle) else: final_min_angles[angle_name].append( previous_min_angle) if angle >= 0 and angle > previous_max_angle and diff_angle < max_diff_angle: final_max_angles[angle_name].append(angle) else: final_max_angles[angle_name].append( previous_max_angle) ## plt.figure() plt.plot(frames[frame_offset + 1:], final_angles[angle_name][frame_offset + 1:]) plt.plot(frames[frame_offset + 1:], final_min_angles[angle_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.plot(frames[frame_offset + 1:], final_max_angles[angle_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.xlabel('Frames') plt.ylabel('Angle (degree)') plt.title(angle_name) plt.grid(True) plt.savefig( os.path.join(self.opt.outputpath_plot, angle_name + ".jpg")) plt.close() ## for distance_name, distance in distances.items(): distance = round(distance, 2) if distance < 0 and frame > frame_offset: distance = final_distances[distance_name][frame - 2] ## final_distances[distance_name].append(distance) ## if frame <= frame_offset: if distance >= 0 and distance < min_distance: final_min_distances[distance_name].append( distance) else: final_min_distances[distance_name].append( min_distance) if distance >= 0 and distance > max_distance: final_max_distances[distance_name].append( distance) else: final_max_distances[distance_name].append( max_distance) else: previous_min_distance = final_min_distances[ distance_name][frame - 2] previous_max_distance = final_max_distances[ distance_name][frame - 2] diff_distance = abs( final_distances[distance_name][frame - 1] - final_distances[distance_name][frame - 2]) if distance_name is 'Distance_10' or distance_name is 'Distance_11': diff_distance *= 100 if distance >= 0 and distance < previous_min_distance and diff_distance < max_diff_distance: final_min_distances[distance_name].append( distance) else: final_min_distances[distance_name].append( previous_min_distance) if distance >= 0 and distance > previous_max_distance and diff_distance < max_diff_distance: final_max_distances[distance_name].append( distance) else: final_max_distances[distance_name].append( previous_max_distance) ## plt.figure() plt.plot( frames[frame_offset + 1:], final_distances[distance_name][frame_offset + 1:]) plt.plot( frames[frame_offset + 1:], final_min_distances[distance_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.plot( frames[frame_offset + 1:], final_max_distances[distance_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.xlabel('Frames') plt.ylabel('Distance (cm)') plt.title(distance_name) plt.grid(True) plt.savefig( os.path.join(self.opt.outputpath_plot, distance_name + ".jpg")) plt.close() ## df_angle = pd.DataFrame.from_dict(final_angles) df_min_angle = pd.DataFrame.from_dict(final_min_angles) df_max_angle = pd.DataFrame.from_dict(final_max_angles) with pd.ExcelWriter( os.path.join(self.opt.outputpath_plot, "Angles.xlsx")) as writer: df_angle.to_excel(writer, sheet_name='Angles', index=False) df_min_angle.to_excel(writer, sheet_name='Min_Angles', index=False) df_max_angle.to_excel(writer, sheet_name='Max_Angles', index=False) ## df_distance = pd.DataFrame.from_dict(final_distances) df_min_distance = pd.DataFrame.from_dict( final_min_distances) df_max_distance = pd.DataFrame.from_dict( final_max_distances) with pd.ExcelWriter( os.path.join(self.opt.outputpath_plot, "Distances.xlsx")) as writer: df_distance.to_excel(writer, sheet_name='Distances', index=False) df_min_distance.to_excel(writer, sheet_name='Min_Distances', index=False) df_max_distance.to_excel(writer, sheet_name='Max_Distances', index=False) ######### self.write_image( img, im_name, stream=stream if self.save_video else None, frame=frame)