def get_prediction(self, orig_img, boxes, scores, single_height, output_l): inp = im_to_torch(orig_img) inps = torch.zeros(boxes.size(0), 3, args.inputResH, args.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) inps = Variable(inps.cuda()) hm = self.pose_model(inps) if boxes is None: return False, hm else: preds_hm, preds_img, preds_scores = getPrediction( hm.cpu(), pt1, pt2, args.inputResH, args.inputResW, args.outputResH, args.outputResW) # result(被姿态估计筛选后的) : bbox,bbox_score,roi,keypoints, kp_score (两个镜头的) box, box_s, roi, kp, kp_s = pose_nms(boxes, scores, preds_img, preds_scores, single_height, orig_img, output_l) return True, (box, box_s, roi, kp, kp_s)
dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() ckpt_time, detNMS_time = getTime(ckpt_time) runtime_profile['dn'].append(detNMS_time) # Pose Estimation inps = torch.zeros(boxes.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) inps = Variable(inps.cuda()) hm = pose_model(inps) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) writer.save(boxes, scores, hm.cpu(), pt1, pt2, orig_img, im_name=str(i) + '.jpg') ckpt_time, post_time = getTime(ckpt_time)
def detect_main(im_name, orig_img, det_model, pose_model, opt): args = opt mode = args.mode inp_dim = int(opt.inp_dim) dim = orig_img.shape[1], orig_img.shape[0] img_ = (letterbox_image(orig_img, (inp_dim, inp_dim))) img_ = img_[:, :, ::-1].transpose((2, 0, 1)).copy() img = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) img = [img] orig_img = [orig_img] im_name = [im_name] im_dim_list = [dim] # img.append(img_k) # orig_img.append(orig_img_k) # im_name.append(im_name_k) # im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list # DetectionLoader det_inp_dim = int(det_model.net_info['height']) assert det_inp_dim % 32 == 0 assert det_inp_dim > 32 # res_n = 0 with torch.no_grad(): img = img.cuda() prediction = det_model(img, CUDA=True) # a tensor boxes_chair = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 56) boxes_sofa = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 57) boxes_bed = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 59) dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, 0, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: # cv2.imwrite('err_result/no_person/'+im_name[0][0:-4]+'_re.jpg', orig_img[0]) return [] dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: boxes = None scores = None inps = None pt1 = None pt2 = None else: inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) orig_img = orig_img[0] im_name = im_name[0] boxes = boxes_k scores = scores[dets[:, 0] == 0] # orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2 # DetectionProcess with torch.no_grad(): if boxes is None or boxes.nelement() == 0: pass else: inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) # self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2)) batchSize = args.posebatch # fall_res_all = [] for i in range(1): with torch.no_grad(): if boxes is None or boxes.nelement() == 0: # writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) # res_n = 0 continue # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu() # writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) fall_res = [] keypoint_res = [] # fall_res.append(im_name.split('/')[-1]) if opt.matching: preds = getMultiPeakPrediction(hm, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} # img = orig_img img = vis_frame(orig_img, result) for human in result['result']: keypoint = human['keypoints'] kp_scores = human['kp_score'] keypoint = keypoint.numpy() xmax = max(keypoint[:, 0]) xmin = min(keypoint[:, 0]) ymax = max(keypoint[:, 1]) ymin = min(keypoint[:, 1]) box_hm = [xmin, ymin, xmax, ymax] kp_num = 0 for i in range(len(kp_scores)): if kp_scores[i] > 0.05: kp_num += 1 if kp_num < 10: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) fall_res.append([False, xmin, ymin, xmax, ymax]) # print("kp_num:"+str(kp_num)) continue overlap = [] for box in boxes_chair: overlap.append(compute_overlap(box_hm, box)) for box in boxes_sofa: overlap.append(compute_overlap(box_hm, box)) for box in boxes_bed: overlap.append(compute_overlap(box_hm, box)) if len(overlap) > 0 and max(overlap) >= 0.6: # res_n = 0 fall_res.append([False, xmin, ymin, xmax, ymax]) # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("overlap:"+str(overlap)) continue w = xmax - xmin h = ymax - ymin ratio = w / h # distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) xhead = (keypoint[1][0] + keypoint[2][0] + keypoint[2][0] + keypoint[3][0] + keypoint[4][0]) / 4 yhead = (keypoint[1][1] + keypoint[2][1] + keypoint[2][1] + keypoint[3][1] + keypoint[4][1]) / 4 xfeet = (keypoint[15][0] + keypoint[16][0]) / 2 yfeet = (keypoint[15][1] + keypoint[16][1]) / 2 d_ear = (abs(keypoint[3][0] - keypoint[4][0])**2 + abs(keypoint[3][1] - keypoint[4][1])**2)**0.5 r = (w**2 + h**2)**0.5 / d_ear if kp_scores[3] > 0.05 and kp_scores[4] > 0.05 and r < 4: fall_res.append([False, xmin, ymin, xmax, ymax]) # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("r<4") continue # distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) # xhead_foot = abs(xfeet - xhead) # yhead_foot = abs(yfeet - yhead) # dhead_foot = (xhead_foot ** 2 + yhead_foot ** 2) ** 0.5 # ratio = yhead_foot / dhead_foot if min(kp_scores[3], kp_scores[4], kp_scores[15], kp_scores[16]) > 0.05 and yfeet < (keypoint[3][1] + keypoint[4][1]) / 2: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(img, 'Warning!Fall!', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 255, 0), 2) fall_res.append([True, xmin, ymin, xmax, ymax]) keypoint_res.append(keypoint) # res_n = 2 elif w / h >= 1.0: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(img, 'Warning!Fall', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 0, 255), 2) fall_res.append([True, xmin, ymin, xmax, ymax]) keypoint_res.append(keypoint) # res_n = 1 else: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("normal") fall_res.append([False, xmin, ymin, xmax, ymax]) # res_n = 0 # cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) ''' for box in boxes_chair: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0), 2) for box in boxes_sofa: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 2) for box in boxes_bed: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 255), 2) cv2.imwrite('err_result/false/'+im_name[0:-4]+'_re.jpg', img) ''' return keypoint_res
im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() ckpt_time, detNMS_time = getTime(ckpt_time) runtime_profile['dn'].append(detNMS_time) # Pose Estimation inps, pt1, pt2 = crop_from_dets(inp[0], boxes) inps = Variable(inps.cuda()) hm = pose_model(inps) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) writer.save(boxes, scores, hm.cpu().data, pt1, pt2, np.array(orig_img[0], dtype=np.uint8), im_name[0].split('/')[-1]) #writer.save(boxes, scores, hm.cpu().data, pt1, pt2, orig_img[0], im_name[0].split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) # TQDM im_names_desc.set_description(