def load(self,img, orig_img, im_dim_list): with torch.no_grad(): # Human Detection img = img.cuda(torchCuda) prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] # for k in range(len(orig_img)): k=0 boxes_k = boxes[dets[:,0]==k] inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) return (orig_img[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)
def update(self): # keep looping the whole dataset while True: img, orig_img, im_name, im_dim_list = self.dataloder.getitem() with self.dataloder.Q.mutex: self.dataloder.Q.queue.clear() with torch.no_grad(): # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2))
def get_box(prediction, det_inp_dim, im_dim_list, confidence, num_classes, class_num): dets = dynamic_write_results(prediction, confidence, num_classes, class_num, nms=True, nms_conf=0.4) if isinstance(dets, int) or dets.shape[0] == 0: return [] dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] boxes = boxes.numpy().tolist() scores = dets[:, 5:6] scores = scores.numpy().tolist() # print(scores) boxes_out = [] for i in range(len(boxes)): if scores[i][0] >= 0.1: boxes_out.append(boxes[i]) return boxes_out
def NMS_process(self, prediction, frame_id, fvis_0, fvis_1, im_dim_list): dets = dynamic_write_results(prediction, args.confidence, args.num_classes, nms=True, nms_conf=args.nms_thesh) if (isinstance(dets, int) or dets.shape[0] == 0): return False, None, None im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() return True, boxes, scores
def human_detect(self): img, orig_img, im_dim_list = self.img, self.orig_img, self.im_dim_list if img is None: self.human_detect_result = (None, None, None, None, None, None) #print('seection1') return with torch.no_grad(): # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: #no dets self.human_detect_result = (None, orig_img[0], None, None, None, None) #print('seection2') #print(self.human_detect_result[2]) return dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] if isinstance(boxes, int) or boxes.shape[0] == 0: self.human_detect_result = (None, orig_img[0], boxes, scores, None, None) #print('seection3') return inps = torch.zeros(boxes.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inp = im_to_torch(cv2.cvtColor(orig_img[0], cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) self.human_detect_result = (inps, orig_img[0], boxes, scores, pt1, pt2) #print('seection4') return
def detect_image(self, im_path): im, ori_im, im_name, im_dim_list = self.dataloder.getitem_yolo(im_path) with torch.no_grad(): im = im.cuda() prediction = self.det_model(im, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: return (ori_im[0], im_name[0], None, None, None, None, None) dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0] \ .view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] if boxes.shape[0] > 1: boxes = boxes[scores.argmax()].unsqueeze(0) scores = scores[scores.argmax()].unsqueeze(0) dets = dets[scores.argmax()].unsqueeze(0) # len(ori_im) === 1 for k in range(len(ori_im)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: return (ori_im[k], im_name[k], None, None, None, None, None) inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) return (ori_im[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2)
def update(self): # keep looping the whole dataset for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.getitem() # print('___________show the dataloader original information_________') # print('image name',im_name) # print('im_dim_list',im_dim_list) # # print('!!!!!!!!!!!!!!!!!!!!!!!!!+++++++++++++++++++++++++++++++++++++++++++') # print() # print() if img is None: self.Q.put((None, None, None, None, None, None, None)) return with torch.no_grad(): # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() # index_select,第一个参数是索引的对象,第二个参数是如何索引(0是行,1是列),第三个参数是索引的范围 # 返回到检测到目标的索引的im_dim_list(w,h,w,h) im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) # scaling_factor的每个元素就对应一张图片缩放成416的时候所采用的缩放系数 scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): # 将输入det张量每个元素的夹紧到区间 [0,im_dim_list对应的 w,h],并返回结果到一个新张量 dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2))
def update(self): time1 = time.time() _, frame = self.stream.read() # frame = cv2.resize(frame, (frame.shape[1]//2,frame.shape[0]//2)) #TODO TESTING # frame[:,:200,:]=0 # frame[:,450:,:]=0 img_k, self.orig_img, im_dim_list_k = prep_frame(frame, self.inp_dim) img = [img_k] im_name = ["im_name"] im_dim_list = [im_dim_list_k] img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) time2 = time.time() with torch.no_grad(): ### detector ######################### # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: self.visualize2dnoperson() return None dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.visualize2dnoperson() raise NotImplementedError return None inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) time3 = time.time() ### processor ######################### inp = im_to_torch(cv2.cvtColor(self.orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2) ### generator ######################### self.orig_img = np.array(self.orig_img, dtype=np.uint8) # location prediction (n, kp, 2) | score prediction (n, kp, 1) datalen = inps.size(0) batchSize = 20 #args.posebatch() leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] time4 = time.time() for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu().data preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms( boxes, scores, preds_img, preds_scores) time5 = time.time() if not result: # No people self.visualize2dnoperson() return None else: self.kpt = max(result, key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints'] self.visualize2d() return self.kpt time6 = time.time() print("process time : {} ".format(time6 - time5))
def get_pose(self, img_names): if len(img_names) > 1: start_lc = 4000 start_rc = 4000 now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) print('========START-Ten========') final_result = [] vis_images = [] height_difference = [] for img_index in range(len(img_names)): print('--------------------') img_name = img_names[img_index] try: img, orig_img, im_name, im_dim_list = [], [], [], [] inp_dim = int(self.args.inp_dim) im_name_k = img_name img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) except: print('index-{}: image have problem'.format(img_index)) final_result.append((None, None)) continue with torch.no_grad(): img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, self.args.confidence, self.args.num_classes, nms=True, nms_conf=self.args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: print('index-{}: No person detected'.format(img_index)) final_result.append((None, None)) height_difference.append(None) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] k = 0 boxes_k = boxes[dets[:, 0] == k] inps = torch.zeros(boxes_k.size(0), 3, self.args.inputResH, self.args.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) orig_img, im_name, boxes, scores, inps, pt1, pt2 = orig_img[ k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2 inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) batchSize = self.args.posebatch datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min( (j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm_data = hm.cpu() orig_img = np.array(orig_img, dtype=np.uint8) im_name = im_name.split('/')[-1] preds_hm, preds_img, preds_scores = getPrediction( hm_data, pt1, pt2, self.args.inputResH, self.args.inputResW, self.args.outputResH, self.args.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} img = vis_frame(orig_img, result) vis_images.append(img) outpur_dir = os.path.join(self.args.outputpath, 'vis') outpur_dir_raw = os.path.join(self.args.outputpath, 'raw') if not os.path.exists(outpur_dir): os.makedirs(outpur_dir) if not os.path.exists(outpur_dir_raw): os.makedirs(outpur_dir_raw) width = img.shape[1] keypoints = [res['keypoints'][0] for res in result['result']] distance = [xy[0] - width / 2 for xy in keypoints] distance = torch.tensor([torch.abs(m) for m in distance]) indice = torch.argsort(distance)[0] pose_result = result['result'][indice]['keypoints'] # left_arm = pose_result[[6, 8, 10]].numpy() # right_arm = pose_result[[5, 7, 9]].numpy() # ['Nose', 'LEye', 'REye', 'LEar', 'REar', 'LShoulder', 'RShoulder', 'LElbow', 'RElbow', 'LWrist', 'RWrist', 'LHip', # 'RHip', 'LKnee', 'RKnee', 'LAnkle', 'RAnkle'] left_arm = pose_result[[10]].numpy().astype(int) right_arm = pose_result[[9]].numpy().astype(int) left_arm_c_y = np.mean(left_arm, axis=0)[1] right_arm_c_y = np.mean(right_arm, axis=0)[1] # left_arm_c = tuple(np.mean(left_arm, axis=0).astype(int)) # right_arm_c = tuple(np.mean(right_arm, axis=0).astype(int)) left_arm_c = tuple(left_arm[0]) right_arm_c = tuple(right_arm[0]) hd = np.abs(left_arm_c_y - right_arm_c_y) height_difference.append(hd) cv2.circle(img, left_arm_c, 10, (0, 255, 0), -1, 8) cv2.circle(img, right_arm_c, 10, (0, 255, 0), -1, 8) log__vis_name = now_time + '-' + im_name cv2.imwrite(os.path.join(outpur_dir_raw, log__vis_name), orig_img) cv2.imwrite(os.path.join(outpur_dir, log__vis_name), img) if start_lc == 4000 and start_rc == 4000: start_lc = left_arm_c_y start_rc = right_arm_c_y left_move = 0 right_move = 0 else: left_move = left_arm_c_y - start_lc right_move = right_arm_c_y - start_rc print('index-{}--{}: left_c {:0f},right_c {:0f}'.format( img_index, im_name, left_arm_c_y, right_arm_c_y)) print('index-{}--{}: start_lc {:0f},start_rc {:0f}'.format( img_index, im_name, start_lc, start_rc)) print('index-{}--{}: left_move {:0f},right_move {:0f}'.format( img_index, im_name, left_move, right_move)) print('index-{}--{}: height_difference {:0f}'.format( img_index, im_name, hd)) final_result.append((left_move, right_move)) return final_result, vis_images, now_time, height_difference elif len(img_names) == 1: now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) print('========START-One========') final_result = [] vis_images = [] height_difference = [] for img_index in range(len(img_names)): img_name = img_names[img_index] try: img, orig_img, im_name, im_dim_list = [], [], [], [] inp_dim = int(self.args.inp_dim) im_name_k = img_name img_k, orig_img_k, im_dim_list_k = prep_image( im_name_k, inp_dim) img.append(img_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) except: print('index-{}: image have problem'.format(img_index)) final_result.append((None, None)) with torch.no_grad(): img = torch.cat(img) vis_img = img.numpy()[0] vis_img = np.transpose(vis_img, (1, 2, 0)) vis_img = vis_img[:, :, ::-1] vis_images.append(vis_img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, self.args.confidence, self.args.num_classes, nms=True, nms_conf=self.args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: print('index-{}: No person detected'.format(img_index)) final_result.append((None, None)) else: print('index-{}: Person detected'.format(img_index)) final_result.append((4, 4)) return final_result, vis_images, now_time, height_difference
def update(self): # keep looping the whole dataset for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.Q[i] with torch.no_grad(): # Human Detection if opt.device == 'GPU': img = img.cuda() else: img = img.cpu() prediction = self.det_model( img, CUDA=True if opt.device == 'GPU' else False) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): self.Q.append((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.Q.append((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) # multiply the score with bounding box height processed_scores = self.cal_scores(scores, boxes_k) self.Q.append( (orig_img[k], im_name[k], boxes_k[np.argmax(processed_scores ):np.argmax(processed_scores) + 1], scores[np.argmax(processed_scores)], inps[np.argmax(processed_scores ):np.argmax(processed_scores) + 1], pt1[np.argmax(processed_scores ):np.argmax(processed_scores) + 1], pt2[np.argmax(processed_scores ):np.argmax(processed_scores) + 1]))
def update(self): # keep looping the whole video for i in range(self.num_batches): img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # process and add the frame to the queue inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) inp_k = im_to_torch(orig_img_k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection img = Variable(torch.cat(img)).cuda() im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list = im_dim_list.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))
(img, orig_img, inp, im_dim_list) = fvs.read() ckpt_time, load_time = getTime(start_time) runtime_profile['ld'].append(load_time) with torch.no_grad(): # Human Detection img = Variable(img).cuda() im_dim_list = im_dim_list.cuda() prediction = det_model(img, CUDA=True) ckpt_time, det_time = getTime(ckpt_time) runtime_profile['dt'].append(det_time) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: writer.save(None, None, None, None, None, orig_img, im_name=str(i) + '.jpg') continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1)
def update(self): while True: (img, orig_img, im_name, im_dim_list) = self.dataloder.getitem() with self.dataloder.Q.mutex: self.dataloder.Q.queue.clear() with torch.no_grad(): # Human Detection #img = img.cuda() img = img.cuda() prediction = self.det_model(img, CUDA=True) # im_dim_list = im_dim_list.cuda() frame_id = int(im_name.split('.')[0]) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put( (orig_img, frame_id, None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] # Pose Estimation inp = im_to_torch(orig_img) inps = torch.zeros(boxes.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes.size(0), 2) pt2 = torch.zeros(boxes.size(0), 2) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) inps = Variable(inps.cuda()) hm = self.pose_model(inps) if boxes is None: if self.Q.full(): time.sleep(2) self.Q.put( (orig_img, frame_id, None, None, None, None, None)) continue else: preds_hm, preds_img, preds_scores = getPrediction( hm.cpu(), pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) bbox, b_score, kp, kp_score, roi = pose_nms( orig_img, boxes, scores, preds_img, preds_scores) # result = { # 'imgname': im_name, # 'result': result, # 'orig_img' : orig_img # } if self.Q.full(): time.sleep(2) #self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)) #self.Q.put((result, orig_img, im_name)) self.Q.put( (orig_img, frame_id, bbox, b_score, kp, kp_score, roi))
def forward(self, Q_load, Q_det): # keep looping the whole dataset while True: #print(Q_load.qsize(), Q_det.qsize()) img, orig_img, im_dim_list = Q_load.get() with torch.no_grad(): # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if Q_det.full(): time.sleep(0.1) #print("detectionloaderQ1 full ") #Q_det.put((orig_img[k], None, None, None, None, None)) Q_det.put((None, orig_img[k], None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) inp = im_to_torch(cv2.cvtColor(orig_img[k], cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes_k, inps, pt1, pt2) if Q_det.full(): time.sleep(0.1) #print("detectionloaderQ3 full ") #Q_det.put((orig_img[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)) Q_det.put((inps, orig_img[k], boxes_k, scores[dets[:, 0] == k], pt1, pt2))
def update(self): # keep looping the whole dataset for i in range(self.num_batches): img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(i * self.batchSize, min((i + 1) * self.batchSize, self.datalen)): img_k, inp_k, orig_img_k, im_name_k, im_dim_list_k = self.dataset.__getitem__( k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_name.append(im_name_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection if self.cuda_id is None: img = Variable(torch.cat(img)).cuda() else: img = Variable(torch.cat(img)).cuda(self.cuda_id) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) if self.cuda_id is None: im_dim_list = im_dim_list.cuda() else: im_dim_list = im_dim_list.cuda(self.cuda_id) prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh, cuda_id=self.cuda_id) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put( (inp[k], orig_img[k], im_name[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): while self.Q.full(): time.sleep(0.2) self.Q.put((inp[k], orig_img[k], im_name[k], boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))
def detect_main(im_name, orig_img, det_model, pose_model, opt): args = opt mode = args.mode inp_dim = int(opt.inp_dim) dim = orig_img.shape[1], orig_img.shape[0] img_ = (letterbox_image(orig_img, (inp_dim, inp_dim))) img_ = img_[:, :, ::-1].transpose((2, 0, 1)).copy() img = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) img = [img] orig_img = [orig_img] im_name = [im_name] im_dim_list = [dim] # img.append(img_k) # orig_img.append(orig_img_k) # im_name.append(im_name_k) # im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) # im_dim_list_ = im_dim_list # DetectionLoader det_inp_dim = int(det_model.net_info['height']) assert det_inp_dim % 32 == 0 assert det_inp_dim > 32 # res_n = 0 with torch.no_grad(): img = img.cuda() prediction = det_model(img, CUDA=True) # a tensor boxes_chair = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 56) boxes_sofa = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 57) boxes_bed = get_box(prediction, det_inp_dim, im_dim_list, opt.confidence, opt.num_classes, 59) dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, 0, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: # cv2.imwrite('err_result/no_person/'+im_name[0][0:-4]+'_re.jpg', orig_img[0]) return [] dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: boxes = None scores = None inps = None pt1 = None pt2 = None else: inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) orig_img = orig_img[0] im_name = im_name[0] boxes = boxes_k scores = scores[dets[:, 0] == 0] # orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2 # DetectionProcess with torch.no_grad(): if boxes is None or boxes.nelement() == 0: pass else: inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) # self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2)) batchSize = args.posebatch # fall_res_all = [] for i in range(1): with torch.no_grad(): if boxes is None or boxes.nelement() == 0: # writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) # res_n = 0 continue # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu() # writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) fall_res = [] keypoint_res = [] # fall_res.append(im_name.split('/')[-1]) if opt.matching: preds = getMultiPeakPrediction(hm, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} # img = orig_img img = vis_frame(orig_img, result) for human in result['result']: keypoint = human['keypoints'] kp_scores = human['kp_score'] keypoint = keypoint.numpy() xmax = max(keypoint[:, 0]) xmin = min(keypoint[:, 0]) ymax = max(keypoint[:, 1]) ymin = min(keypoint[:, 1]) box_hm = [xmin, ymin, xmax, ymax] kp_num = 0 for i in range(len(kp_scores)): if kp_scores[i] > 0.05: kp_num += 1 if kp_num < 10: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) fall_res.append([False, xmin, ymin, xmax, ymax]) # print("kp_num:"+str(kp_num)) continue overlap = [] for box in boxes_chair: overlap.append(compute_overlap(box_hm, box)) for box in boxes_sofa: overlap.append(compute_overlap(box_hm, box)) for box in boxes_bed: overlap.append(compute_overlap(box_hm, box)) if len(overlap) > 0 and max(overlap) >= 0.6: # res_n = 0 fall_res.append([False, xmin, ymin, xmax, ymax]) # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("overlap:"+str(overlap)) continue w = xmax - xmin h = ymax - ymin ratio = w / h # distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) xhead = (keypoint[1][0] + keypoint[2][0] + keypoint[2][0] + keypoint[3][0] + keypoint[4][0]) / 4 yhead = (keypoint[1][1] + keypoint[2][1] + keypoint[2][1] + keypoint[3][1] + keypoint[4][1]) / 4 xfeet = (keypoint[15][0] + keypoint[16][0]) / 2 yfeet = (keypoint[15][1] + keypoint[16][1]) / 2 d_ear = (abs(keypoint[3][0] - keypoint[4][0])**2 + abs(keypoint[3][1] - keypoint[4][1])**2)**0.5 r = (w**2 + h**2)**0.5 / d_ear if kp_scores[3] > 0.05 and kp_scores[4] > 0.05 and r < 4: fall_res.append([False, xmin, ymin, xmax, ymax]) # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("r<4") continue # distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) # xhead_foot = abs(xfeet - xhead) # yhead_foot = abs(yfeet - yhead) # dhead_foot = (xhead_foot ** 2 + yhead_foot ** 2) ** 0.5 # ratio = yhead_foot / dhead_foot if min(kp_scores[3], kp_scores[4], kp_scores[15], kp_scores[16]) > 0.05 and yfeet < (keypoint[3][1] + keypoint[4][1]) / 2: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(img, 'Warning!Fall!', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 255, 0), 2) fall_res.append([True, xmin, ymin, xmax, ymax]) keypoint_res.append(keypoint) # res_n = 2 elif w / h >= 1.0: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(img, 'Warning!Fall', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 0, 255), 2) fall_res.append([True, xmin, ymin, xmax, ymax]) keypoint_res.append(keypoint) # res_n = 1 else: # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # print("normal") fall_res.append([False, xmin, ymin, xmax, ymax]) # res_n = 0 # cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) ''' for box in boxes_chair: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0), 2) for box in boxes_sofa: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 2) for box in boxes_bed: cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 255), 2) cv2.imwrite('err_result/false/'+im_name[0:-4]+'_re.jpg', img) ''' return keypoint_res
def update(self): print( f'WebcamDetectionLoader_update_thread: {threading.currentThread().name}' ) # keep looping while True: img = [] inp = [] orig_img = [] im_name = [] im_dim_list = [] for k in range(self.batchSize): (grabbed, frame) = self.stream.read() h, w, c = frame.shape # frame = cv2.resize(frame, (int(w / 4), int(h / 4)), interpolation=cv2.INTER_CUBIC) if not grabbed: continue # process and add the frame to the queue inp_dim = int(opt.inp_dim) img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim) inp_k = im_to_torch(orig_img_k) img.append(img_k) inp.append(inp_k) orig_img.append(orig_img_k) im_dim_list.append(im_dim_list_k) with torch.no_grad(): ht = inp[0].size(1) wd = inp[0].size(2) # Human Detection img = Variable(torch.cat(img)).cuda() im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) im_dim_list = im_dim_list.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(inp)): if self.Q.full(): with self.Q.mutex: self.Q.queue.clear() self.Q.put((inp[k], orig_img[k], None, None)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() for k in range(len(inp)): if self.Q.full(): with self.Q.mutex: self.Q.queue.clear() self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))
def update(self): # keep looping the whole dataset from mtcnn.mtcnn import MTCNN detector = MTCNN() for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.getitem() if img is None: self.Q.put((None, None, None, None, None, None, None)) return with torch.no_grad(): if self.dataloder.format == 'yolo': # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) elif self.dataloder.format == 'mtcnn': # Face detection imgs_np = img.float().mul(255.0).cpu().numpy() imgs_np = np.squeeze(imgs_np, axis=0) imgs_np = np.transpose(imgs_np, (1, 2, 0)) dets = detector.detect_faces(imgs_np) fac_det = [] for det in dets: fac_det.append([ 0, det["box"][0], det["box"][1], det["box"][0] + det["box"][2], det["box"][1] + det["box"][3], det["confidence"], 0.99, 0 ]) dets = torch.tensor(fac_det) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2))
def update(self): # keep looping the whole dataset for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.getitem() if img is None: self.Q.put((None, None, None, None, None, None, None)) return with torch.no_grad(): img = img.cuda() # Critical, use yolo to do object detection here! prediction = self.det_model(img) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() # Scale for SIXD dataset reso = self.det_inp_dim im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) w, h = im_dim_list[:, 0], im_dim_list[:, 1] w_ratio = w / reso h_ratio = h / reso boxes = dets[:, 1:5] boxes[:, 0] = boxes[:, 0] * w_ratio boxes[:, 1] = boxes[:, 1] * h_ratio boxes[:, 2] = boxes[:, 2] * w_ratio boxes[:, 3] = boxes[:, 3] * h_ratio scores = dets[:, 5:6] # im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long()) # scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # # coordinate transfer # dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 # dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 # dets[:, 1:5] /= scaling_factor # for j in range(dets.shape[0]): # dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) # dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) # boxes = dets[:, 1:5] # scores = dets[:, 5:6] img = Image.open(im_name[0]) draw = ImageDraw.Draw(img) for i in range(boxes.shape[0]): x1, y1, x2, y2 = boxes[i, 0], boxes[i, 1], boxes[i, 2], boxes[i, 3] objectness = 'conf: %.2f' % scores draw.rectangle((x1, y1, x2, y2), outline='red') # img.save(im_name[0].replace('rgb', 'results')) for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2))
def update(self): # keep looping the whole dataset """ :return: """ for i in range(self.num_batches): # repeat img, orig_img, im_name, im_dim_list = self.dataloder.getitem() # img = (batch, frames) if img is None: self.Q.put((None, None, None, None, None, None, None)) return start_time = getTime() with torch.no_grad(): # Human Detection img = img.cuda() # image ( B, 3, 608,608 ) prediction = self.det_model(img, CUDA=True) # ( B, 22743, 85 ) = ( batchsize, proposal boxes, xywh+cls) # predictions for each B image. # NMS process carperson = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(carperson, int) or carperson.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(0.5) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None, None)) # 8 elements continue ckpt_time, det_time = getTime(start_time) carperson = carperson.cpu() # (1) k-th image , (7) x,y,w,h,c, cls_score, cls_index im_dim_list = torch.index_select(im_dim_list, 0, carperson[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer carperson[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 carperson[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 carperson[:, 1:5] /= scaling_factor for j in range(carperson.shape[0]): carperson[j, [1, 3]] = torch.clamp(carperson[j, [1, 3]], 0.0, im_dim_list[j, 0]) carperson[j, [2, 4]] = torch.clamp(carperson[j, [2, 4]], 0.0, im_dim_list[j, 1]) cls_car_mask = carperson * (carperson[:, -1] == 2).float().unsqueeze(1) # car class__car_mask_ind = torch.nonzero(cls_car_mask[:, -2]).squeeze() car_dets = carperson[class__car_mask_ind].view(-1, 8) cls_person_mask = carperson * (carperson[:, -1] == 0).float().unsqueeze(1) # person class__person_mask_ind = torch.nonzero(cls_person_mask[:, -2]).squeeze() hm_dets = carperson[class__person_mask_ind].view(-1, 8) ckpt_time, masking_time = getTime(ckpt_time) hm_boxes, hm_scores = None, None if hm_dets.size(0) > 0: hm_boxes = hm_dets[:, 1:5] hm_scores = hm_dets[:, 5:6] car_box_conf = None if car_dets.size(0) > 0: car_box_conf = car_dets for k in range(len(orig_img)): # for k-th image detection. if car_box_conf is None: car_k = None else: car_k = car_box_conf[car_box_conf[:, 0] == k].numpy() car_k = car_k[np.where(car_k[:, 5] > 0.2)] # TODO check here, cls or bg/fg confidence? # car_k = non_max_suppression_fast(car_k, overlapThresh=0.3) # TODO check here, NMS if hm_boxes is not None: hm_boxes_k = hm_boxes[hm_dets[:, 0] == k] hm_scores_k = hm_scores[hm_dets[:, 0] == k] inps = torch.zeros(hm_boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(hm_boxes_k.size(0), 2) pt2 = torch.zeros(hm_boxes_k.size(0), 2) item = (orig_img[k], im_name[k], hm_boxes_k, hm_scores_k, inps, pt1, pt2, car_k) # print('video processor ', 'image' , im_name[k] , 'hm box ' , hm_boxes_k.size()) else: item = (orig_img[k], im_name[k], None, None, None, None, None, car_k) # 8-elemetns if self.Q.full(): time.sleep(0.5) self.Q.put(item) ckpt_time, distribute_time = getTime(ckpt_time)
def update(self): for i in range(self.num_batches): img, orig_img, im_name, im_dim_list = self.dataloder.getitem() if img is None: self.Q.put((None, None, None, None, None, None, None)) return # 当网络中的某一个tensor不需要梯度时,可以使用torch.no_grad()来处理 with torch.no_grad(): # Human Detction img = img.cuda() prediction = self.det_model(img, CUDA=True) dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) # mul person if isinstance(dets, int) or dets.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] print("boxes", boxes) scores = dets[:, 5:6] print("scoes", scores) for k in range(len(orig_img)): boxes_k = boxes[dets[:, 0] == k] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None)) continue inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) if self.Q.full(): time.sleep(2) self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:, 0] == k], inps, pt1, pt2))