def boxes_nms(flow_boxes, cur_boxes, previous_ids): ''' flow_boxes: (N, 4) cur_boxes: (M, 4) flow_scores: (N) cur_scores: (M) 返回筛选之后的boxes和ids ''' boxes_similarity_matrix = boxes_similarity(flow_boxes, cur_boxes) flow_filter_ids, cur_ids = bipartite_matching_greedy( boxes_similarity_matrix) tmp1_boxes = flow_boxes[flow_filter_ids, ] tmp2_boxes = cur_boxes[cur_ids, ] cur_len = len(flow_boxes) + len(cur_boxes) - len(cur_ids) nms_ids = match_ids(previous_ids, flow_filter_ids, cur_ids, cur_len) if len(np.setdiff1d(flow_boxes, tmp1_boxes)) != 0: tmp1_boxes = np.concatenate( (tmp1_boxes, np.setdiff1d(flow_boxes, tmp1_boxes).reshape(-1, 4)), 0) if len(np.setdiff1d(cur_boxes, tmp2_boxes)) != 0: tmp1_boxes = np.concatenate( (tmp1_boxes, np.setdiff1d(cur_boxes, tmp2_boxes).reshape(-1, 4)), 0) nms_boxes = tmp1_boxes return nms_boxes, nms_ids
def boxes_nms_test(flow_boxes, cur_boxes, image_resolution): ''' flow_boxes: (N, 5) -> 5: x0,y0,x1,y1,score cur_boxes: (M, 5) previous_ids: flow_boxes的排列顺序 image_resolution (W, H) 返回筛选之后的boxes和相应的ids ''' global max_id boxes_similarity_matrix = boxes_similarity(flow_boxes[...,:4], cur_boxes[...,:4]) flow_filter_ids, cur_ids = bipartite_matching_greedy(boxes_similarity_matrix) # 匹配之后剩下的 tmp1_boxes = flow_boxes[flow_filter_ids,] tmp2_boxes = cur_boxes[cur_ids,] # 如果多出边框的话,表示新出现人体, 添加进boxes if len(np.setdiff1d(cur_boxes, tmp2_boxes)) != 0: a = cur_boxes b = tmp2_boxes l = []#用l存储a中b的每一行的索引位置 for i in range(len(b)): for j in range(len(a)): import operator as op if(op.eq(list(a[j]), list(b[i]))):#op.eq比较两个list,相同返回Ture l.append(j) #delete函数删除a中对应行 re = np.delete(a, l, 0) tmp2_boxes = np.concatenate((tmp2_boxes ,re), 0) # 如果flow-boxes有未检测到人体 P1. 确实是消失了(丢弃) P2. YOLO检测器没检测到(放入boxes) if len(np.setdiff1d(flow_boxes, tmp1_boxes)) != 0: a = flow_boxes b = tmp1_boxes l = []#用l存储a中b的每一行的索引位置 for i in range(len(b)): for j in range(len(a)): import operator as op if(op.eq(list(a[j]), list(b[i]))):#op.eq比较两个list,相同返回Ture l.append(j) #delete函数删除a中对应行 remained_boxes = np.delete(a, l, 0) for item_box in remained_boxes: # P1 box_area_value = box_area(item_box[1:]) if box_area_value < 1/10 * image_resolution[0] * image_resolution[1]: print('flow box dispear...') # P2 else: tmp2_boxes = np.concatenate((tmp2_boxes , np.expand_dims(item_box, 0)), 0) nms_boxes = tmp2_boxes return nms_boxes
def boxes_nms_test(flow_boxes, cur_boxes, previous_ids, image_resolution): ''' flow_boxes: (N, 5) -> 5: x0,y0,x1,y1,score cur_boxes: (M, 5) previous_ids: flow_boxes的排列顺序 image_resolution (W, H) 返回筛选之后的boxes和相应的ids ''' global max_id boxes_similarity_matrix = boxes_similarity(flow_boxes[..., :4], cur_boxes[..., :4]) flow_filter_ids, cur_ids = bipartite_matching_greedy( boxes_similarity_matrix) # 匹配之后剩下的 tmp1_boxes = flow_boxes[flow_filter_ids, ] tmp2_boxes = cur_boxes[cur_ids, ] # 如果多出边框的话,表示新出现人体, 添加进boxes if len(np.setdiff1d(flow_boxes, tmp1_boxes)) != 0: tmp1_boxes = np.concatenate( (tmp1_boxes, np.setdiff1d(flow_boxes, tmp1_boxes).reshape(-1, 5)), 0) # 如果flow-boxes有未检测到人体 P1. 确实是消失了(丢弃) P2. YOLO检测器没检测到(放入boxes) if len(np.setdiff1d(cur_boxes, tmp2_boxes)) != 0: remained_boxes = np.setdiff1d(cur_boxes, tmp2_boxes).reshape(-1, 5) for item_box in remained_boxes: # P1 box_area_value = box_area(item_box) if box_area_value < 1 / 10 * image_resolution[ 0] * image_resolution[1]: print('flow box dispear...') pdb() # P2 else: tmp1_boxes = np.concatenate( (tmp1_boxes, np.expand_dims(item_box, 0)), 0) nms_boxes = tmp1_boxes cur_len = len(nms_boxes) cur_maps = -np.ones(shape=(cur_len, )) for pos, num in enumerate(cur_ids): cur_maps[pos] = previous_ids[flow_filter_ids[pos]] for i in range(cur_len): if cur_maps[i] == -1.: max_id += 1 cur_maps[i] = max_id nms_ids = cur_maps.astype(np.uint8).tolist() return nms_boxes, nms_ids
def main(): global max_id args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # 保持长宽都是64的倍数,用于flownet2 resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int( (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64 image_resolution = (resize_W, resize_H) print(resize_W, resize_H) input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() for i in tqdm(range(video_length - 1)): ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) try: if i > 0: pdb() flow_result = flow_net(pre_image, input_image, flow_model) flow_boxes, flow_kpts = flow_propagation( prev_kpts, flow_result) flow_boxes = np.concatenate( (flow_boxes, np.expand_dims(prev_boxes[..., 4], -1)), -1) # flow_boxes + previous boxes scores flow_kpts = np.concatenate((flow_kpts, prev_kpts_scores), -1) # boxes_threthold is 0.9 detected_boxes, detected_scores = yolo_det( input_image, human_model) # bbox is coordinate location detected_scores = np.expand_dims(detected_scores.flatten(), -1) detected_boxes = np.concatenate((detected_boxes, detected_scores), -1) # (N, 17, 3) if i == 0: inputs, origin_img, center, scale = PreProcess( input_image, detected_boxes[..., :4], detected_boxes[..., 4], cfg) # ploted_image = plot_boxes(input_image, detected_boxes, [i for i in range(len(detected_boxes))]) # cv2.imshow('image', ploted_image) # cv2.waitKey(100) else: # 最难! 会重新给pose net一个输入顺序, 并且给出相应的ids print('before mapping: ', previous_ids) new_boxes, new_ids = boxes_nms_test(flow_boxes, detected_boxes, previous_ids, image_resolution) print('after mapping: ', new_ids) print(flow_boxes[:, 1], detected_boxes[:, 1]) # ploted_image = plot_boxes(input_image, new_boxes, new_ids) # cv2.imshow('image', ploted_image) # cv2.waitKey(100) inputs, origin_img, center, scale = PreProcess( input_image, new_boxes[..., :4], new_boxes[..., 4], cfg) except Exception as e: print(e) out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue # 姿态检测 with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate detected_kpts, detected_kpts_scores = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) detected_kpts = np.concatenate( (detected_kpts, detected_kpts_scores), 2) # TRACK Assign IDs. flow_boxes; detected_boxes, new_ids if i > 0: pose_similarity_matrix = compute_pairwise_oks( flow_kpts, flow_boxes[..., :4], detected_kpts) box_similarity_matrix = boxes_similarity(flow_boxes[..., :4], detected_boxes[..., :4]) ratio = 0.5 similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * ( 1 - ratio) prev_filter_ids, cur_ids = bipartite_matching_greedy( similarity_matrix) print('previous frame boxes: ', prev_pose_ids) cur_len = len(detected_kpts) new_pose_ids = pose_match_ids(prev_pose_ids, prev_filter_ids, cur_ids, cur_len) # detected_kpts = detected_kpts[ [i-1 for i in new_ids],:] # detected_kpts_scores = detected_kpts_scores[[i-1 for i in new_ids],:] print(prev_filter_ids, cur_ids) print('after map: ', new_pose_ids) # 为下一帧处理做准备 pre_image = input_image.copy() prev_kpts = detected_kpts prev_kpts_scores = detected_kpts_scores if i == 0: prev_boxes = detected_boxes previous_ids = [j for j in range(len(detected_boxes))] prev_pose_ids = previous_ids else: previous_ids = new_ids prev_boxes = new_boxes prev_pose_ids = new_pose_ids if i > 1: image = plot_keypoint_track(origin_img, detected_kpts, detected_kpts_scores, new_boxes[..., :4], new_pose_ids, 0.1) else: image = plot_keypoint_track(origin_img, detected_kpts, detected_kpts_scores, detected_boxes[..., :4], previous_ids, 0.1) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # 保持长宽都是64的倍数 resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int( (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64 print(resize_W, resize_H) input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() flow_boxs = 0 flow_kpts = 0 previous_ids = 0 pdb() for i in tqdm(range(video_length - 1)): ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) if i > 0: try: flow_result = flow_net(pre_image, input_image, flow_model) flow_boxs, flow_kpts = flow_propagation( pre_keypoints, flow_result) flow_kpts = np.concatenate((flow_kpts, flow_pose_scores), -1) except Exception as e: print(e) continue pre_image = input_image try: # boxes_threthold is 0.6 bboxs, scores = yolo_det( input_image, human_model) # bbox is coordinate location # 第一帧 if i == 0: inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) # 初始IDs, 和 socres map previous_ids = [i for i in range(len(bboxs))] # id_scores_map = {} # for i in range(len(bboxs)): id_scores_map.update({previous_ids[i]: scores[i]}) else: # 本帧、上一帧 边框置信度NMS # new_boxs, new_ids = boxes_nms(flow_boxs, bboxs, previous_ids) inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except Exception as e: print(e) out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) keypoints = np.concatenate((preds, maxvals), 2) # 当前帧边框置信度, 作为下一帧流边框的置信度 # flow_bbox_scores = scores.copy() # if i != 1: # preds = (preds + flow_kpts) / 2 # shift-kpts, shift-boxes, cur_kpts ------> TRACK if i > 0: kps_b = keypoints.copy() box_b = bboxs[:preds.shape[0]] kps_a = flow_kpts # (N, 17, 3) box_a = flow_boxs pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b) box_similarity_matrix = boxs_similarity(box_a, box_b) ratio = 0.5 similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * ( 1 - ratio) prev_filter_ids, cur_ids = bipartite_matching_greedy( similarity_matrix) print('previous frame boxes: ', previous_ids) print(prev_filter_ids, cur_ids) cur_len = len(box_b) + len(box_a) - len(cur_ids) cur_maps = -np.ones(shape=(cur_len, )) new_boxes = [] new_kpts = [] for pos, num in enumerate(cur_ids): cur_maps[pos] = previous_ids[prev_filter_ids[pos]] new_boxes.append(bo) prev_max_id = max(max(previous_ids), prev_max_id) for i in range(cur_len): if cur_maps[i] == -1.: prev_max_id += 1 cur_maps[i] = prev_max_id previous_ids = cur_maps.astype(np.uint8).tolist() print('after map: ', previous_ids) # 整理好传给下一帧flownet的关键点, ids, if i == 0: pre_flow_keypoints = keypoints pre_flow_pkt_scores = scores.copy() # 根据映射结果 else: pre_flow_keypoints = tracked_keypoints pre_flow_pkt_scores = tracked_scores if i > 1: image = plot_keypoint_track(origin_img, preds, maxvals, box_b, previous_ids, 0.1) if args.display and i > 1: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): previous_ids = 0 args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int((input_image.shape[0] / input_image.shape[1] * resize_W) / 64 ) * 64 input_image = cv2.resize(input_image, (resize_W, resize_H)) image_resolution = (resize_W, resize_H) fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() #### load optical flow model flow_model = load_model() item = 0 prev_max_id = 0 for i in tqdm(range(video_length-1)): x0 = ckpt_time() ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) item = 0 try: bboxs, scores = yolo_det(input_image, human_model) bboxs = np.concatenate((bboxs, scores.transpose(1,0)), -1) # 加入flownet 模块 if i>0: flow_result = flow_net(pre_image, input_image, flow_model) flow_boxes, flow_kpts = flow_propagation(prev_kpts, flow_result) flow_boxes = np.concatenate((flow_boxes, np.expand_dims(prev_boxes[...,4], -1)), -1) flow_kpts = np.concatenate((flow_kpts,prev_kpts_scores), -1) detected_boxes = bboxs.copy() # plot_boxes(input_image.copy(), flow_boxes, [i for i in range(len(flow_boxes))], '{}_flow.png'.format(1000+i)) # plot_boxes(input_image.copy(), detected_boxes, [i for i in range(len(detected_boxes))], '{}_detected.png'.format(1000+i)) bboxs = boxes_nms_test(flow_boxes, bboxs, image_resolution) # plot_boxes(input_image.copy(), bboxs, [i for i in range(len(bboxs))], 'nms_{}.png'.format(100+i)) inputs, origin_img, center, scale = PreProcess(input_image, bboxs[..., :4], bboxs[...,4], cfg) except Exception as e: print(e) pdb() continue try: with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) except Exception as e: print(e) continue kps_b = np.concatenate((preds, maxvals), 2) prev_kpts = kps_b.copy() box_b = bboxs[:preds.shape[0]] if previous_ids == 0: previous_ids = [j for j in range(len(preds))] if i>0: # kps_a是前一帧的 kps_b是当前hrnet检测出来的 kps_a = flow_kpts box_a = flow_boxes pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b) box_similarity_matrix = boxes_similarity(box_a, box_b) # pose similarity ratio ratio = 0.5 similarity_matrix = pose_similarity_matrix*ratio + box_similarity_matrix*(1-ratio) # prev_filter_ids: 经过筛选之后的上一帧ids序列 prev_filter_ids, cur_ids = bipartite_matching_greedy(similarity_matrix) print('previous frame boxes: ',previous_ids) print(prev_filter_ids, cur_ids) cur_len = len(box_b) cur_maps = -np.ones(shape=(cur_len,)) for pos, num in enumerate(cur_ids): cur_maps[num] = previous_ids[prev_filter_ids[pos]] prev_max_id = max(max(previous_ids), prev_max_id) for j in range(cur_len): if cur_maps[j] == -1.: prev_max_id += 1 cur_maps[j] = prev_max_id # 作为下一次循环的上一帧ids序列 previous_ids = cur_maps.astype(np.uint8).tolist() print('after map: ', previous_ids) # 作为下一次循环的上一帧 kps_a = kps_b.copy() box_a = box_b.copy() prev_kpts = kps_b prev_kpts_scores = maxvals pre_image = input_image prev_boxes = bboxs if i>0: image = plot_keypoint_track(origin_img, preds, maxvals, box_a, previous_ids, 0.1) out.write(image) if args.display and i>0: ########### 指定屏幕大小 winname = 'image' cv2.namedWindow(winname) # Create a named window cv2.moveWindow(winname, 1000,850) # Move it to (40,30) cv2.imshow(winname, image) cv2.waitKey(100)