def update(self): # keep looping the whole dataset for i in range(self.datalen): with torch.no_grad(): (orig_img, im_name, boxes, scores, inps, pt1, pt2, CAR) = self.detectionLoader.read() # print('detection processor' , im_name, boxes) if orig_img is None: self.Q.put((None, None, None, None, None, None, None, None)) return # if boxes is None or boxes.nelement() == 0: if boxes is None: while self.Q.full(): time.sleep(0.2) self.Q.put((None, orig_img, im_name, boxes, scores, None, None, CAR)) continue start_time = getTime() inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2) # print(boxes, pt1,pt2) ckpt_time, torch_time = getTime(start_time) # print('torch time', round(torch_time, 3)) while self.Q.full(): time.sleep(0.2) self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2, CAR))
def forward(self, Q_det): clock = 0 while 1: with torch.no_grad(): start_time = getTime() (inps, orig_img, boxes, scores, pt1, pt2) = Q_det.get() ckpt_time, det_time = getTime(start_time) img = orig_img if boxes is not None: hm_data = self.pose_model(inps.cuda()) ckpt_time, pose_time = getTime(ckpt_time) hm_data = hm_data.cpu() preds_hm, preds_img, preds_scores = getPrediction( hm_data, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'result': result} img, datas, valid = vis_frame(img, result, opt.webcam_num) ckpt_time, post_time = getTime(ckpt_time) #print("det_time={:.3f}, pose_time={:.3f}, post_time={:.3f}, fps={:.1f}".format(det_time,pose_time,post_time, 1/(det_time+pose_time+post_time))) if valid: for data in datas: clock = 0 self.mq.sendMsg(data) print(data) else: clock = clock + 1 if clock > 500: self.mq.sendMsg( '{{ "source": "Behavior", "deviceId": {}, "hand": "{},{}", "body": {}, "state": {} }}' .format(0, 0, 0, 0, 0)) clock = 0 else: time.sleep(0.05) clock = clock + 1 if clock > 500: self.mq.sendMsg( '{{ "source": "Behavior", "deviceId": {}, "hand": "{},{}", "body": {}, "state": {} }}' .format(0, 0, 0, 0, 0)) clock = 0 '''' self.mq.sendMsg('{ "source": "Behavior", "deviceId": 0, "hand": "0,0", "body": 0 }') print('{ "source": "Behavior", "deviceId": 0, "hand": "0,0", "body": 0 }') ''' if opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(10) temp = cv2.imencode(".jpg", img)[1] icode = np.array(temp) se = icode.tostring() n = len(se) self.dll.mjpg_write(c_char_p(se), n)
def update(self): while True: # if the thread indicator variable is set, stop the # thread if self.stopped: if self.save_video: print('released video stream') self.stream.release() return # otherwise, ensure the queue is not empty if not self.Q.empty(): start_time = getTime() (boxes, scores, hm_data, pt1, pt2, orig_img, img_id, car_np) = self.Q.get() orig_img = np.array(orig_img, dtype=np.uint8) img = orig_img # print('processing ' , img_id) """ PERSON """ person_list = self.person.person_tracking(boxes, scores, hm_data, pt1, pt2, img_id) self.person.person_tracjectory(person_list) vis_frame(img, person_list) """ Car """ if opt.park: car_dest_list = self.vehicle.car_tracking(car_np, img_id) self.vehicle.car_trajectory(car_dest_list) self.vehicle.parking_detection(car_dest_list, img, img_id) elif opt.fight: self.person.fight_detection(person_list, img_id) cv2.putText(img, f'Frame: {str(img_id).ljust(4)}', (10,40) , cv2.FONT_HERSHEY_DUPLEX, 2, WHITE, 2) # point is left-bottom # self.fight_detection(person_list) # self.car_person_detection(car_dest_list, bbox_dets_list, img) ckpt_time, det_time = getTime(start_time) if opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(33) if opt.save_video: self.stream.write(img) else: time.sleep(0.1)
def update(self): # keep looping the whole dataset """ :return: """ car_list_list, hm_list_list = get_bbox_list(self.txtpath) for i in range(self.num_batches): # repeat img, orig_img, im_name, im_dim_list = self.dataloder.getitem() # img = (batch, frames) if img is None: self.Q.put((None, None, None, None, None, None, None)) return start_time = getTime() # with torch.no_grad(): # Human Detection # img = img.cuda() # image ( B, 3, 608,608 ) for k in range(len(orig_img)): # for k-th image detection. im_name_k = im_name[k] car_list = car_list_list[im_name_k] hm_list = hm_list_list[im_name_k] if len(car_list) == 0: # empty car car_list_np = None else: car_list_np = np.array(car_list) if len(hm_list): # human not empty # bbox [idx, cls, x, y, w, h, c] hm_list_np = np.array(hm_list) hm_boxes_k = hm_list_np[:, 0:4] hm_scores_k = hm_list_np[:, 4] size = hm_boxes_k.shape[0] inps = torch.zeros(size, 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(size, 2) pt2 = torch.zeros(size, 2) item = (orig_img[k], im_name[k], hm_boxes_k, hm_scores_k, inps, pt1, pt2, car_list_np) else: item = (orig_img[k], im_name[k], None, None, None, None, None, car_list_np) # 8-elemetns if self.Q.full(): time.sleep(0.3) self.Q.put(item)
pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'ld': [], 'dt': [], 'dn': [], 'pt': [], 'pn': []} print('Starting webcam demo, press Ctrl + C to terminate...') sys.stdout.flush() im_names_desc = tqdm(loop()) for i in im_names_desc: try: start_time = getTime() (img, orig_img, inp, im_dim_list) = fvs.read() ckpt_time, load_time = getTime(start_time) runtime_profile['ld'].append(load_time) with torch.no_grad(): # Human Detection img = Variable(img).cuda() im_dim_list = im_dim_list.cuda() prediction = det_model(img, CUDA=True) ckpt_time, det_time = getTime(ckpt_time) runtime_profile['dt'].append(det_time) # NMS process dets = dynamic_write_results(prediction, opt.confidence,
else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = { 'ld': [], 'dt': [], 'dn': [], 'pt': [], 'pn': [] } im_names_desc = tqdm(range(test_loader.length())) for i in im_names_desc: start_time = getTime() with torch.no_grad(): # Human Detection (inp, orig_img, boxes, scores) = test_loader.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name=str(i)+'.jpg') continue print("test loader:", test_loader.len()) ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation inps, pt1, pt2 = crop_from_dets(inp, boxes) inps = Variable(inps.cuda()) hm = pose_model(inps)
def handle_video(videofile): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # 获取第 0 个框的人 kpts = [] for i in range(len(final_result)): try: preds = final_result[i]['result'] # preds[i]['keypoints'] (17,2) # preds[i]['kp_score'] (17,1) # preds[i]['proposal_score'] (1) # 选择 y 坐标最大的人 —— 用于打羽毛球视频 max_index = 0 min_index = 0 # max_y = np.mean(preds[0]['keypoints'].data.numpy()[:, 1]) min_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0]) max_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0]) for k in range(len(preds)): # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1]) tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0]) # if tmp_y > max_y: if tmp_x < min_x: min_index = k # max_y = tmp_y min_x = tmp_x for k in range(len(preds)): # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1]) tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0]) # if tmp_y > max_y: if tmp_x > max_x: max_index = k max_x = tmp_x mid_index = 0 for k in range(len(preds)): if k == max_index or k == min_index: continue mid_index = k kpt = preds[mid_index]['keypoints'] # kpt = final_result[i]['result'][0]['keypoints'] kpts.append(kpt.data.numpy()) except: # print(sys.exc_info()) print('error...') filename = os.path.basename(args.video).split('.')[0] name = filename + '.npz' kpts = np.array(kpts).astype(np.float32) # print('kpts npz save in ', name) # np.savez_compressed(name, kpts=kpts) return kpts
# detection module print('Loading detection model ') sys.stdout.flush() det_model = inference.yolo_detecter() # pose module print('Loading pose model') sys.stdout.flush() pose_model = inference.pose_detection() print('Starting webcam demo, press Ctrl + C to terminate...') sys.stdout.flush() im_names_desc = loop() # tqdm(loop()) for i in im_names_desc: try: start_time = getTime() begin = time.time() # ################################## Get Frames #################################### print('\n******************* Frame:%d ********************' % i) img_0 = fvs_0.read() img_1 = fvs_1.read() # 可视化显示变成1/2 fvis_0 = resize_vis(img_0) fvis_1 = resize_vis(img_1) # YOLO处理图片时变成1/4 frame_0 = resize_yolo(img_0) frame_1 = resize_yolo(img_1) single_height = frame_0.shape[ 0] # print(frame_0.shape) # (432, 768, 3)
def call_alphapose(input_dir, output_dir, format='open', batchSize=1): if not os.path.exists(output_dir): os.mkdir(output_dir) for root, dirs, files in os.walk(input_dir): im_names = files print(files) data_loader = ImageLoader(im_names, batchSize=batchSize, format='yolo', dir_path=input_dir).start() det_loader = DetectionLoader(data_loader, batchSize=batchSize).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(False).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, output_dir, _format=format) correct_json_save(output_dir) print('Over')
def update(self): next_id = 0 while True: # if the thread indicator variable is set, stop the # thread if self.stopped: if self.save_video: self.stream.release() return # otherwise, ensure the queue is not empty if not self.Q.empty(): start_time = getTime() (boxes, scores, hm_data, pt1, pt2, orig_img, img_id, CAR) = self.Q.get() # print(img_id) orig_img = np.array(orig_img, dtype=np.uint8) img = orig_img # text_filled2(img,(5,200),str(img_id),LIGHT_GREEN,2,2) """ PERSON """ if boxes is None: # No person detection person_list = [] pass else: person_list = self.person_tracking(boxes, scores, hm_data, pt1, pt2, img_id) # update frame vis_frame(img, person_list) """ Car """ if CAR is not None: car_np = CAR car_dest_list = self.car_tracking(car_np, img_id) else: car_dest_list = [] self.car_list_list.append(car_dest_list) self.person_list_list.append(person_list) self.car_trajectory(car_dest_list) self.person_tracjectory(person_list) # FOR GIST2019 if img_id != 0: self.fight_detection(person_list) # self.parking_detection(car_dest_list, img, img_id) # self.car_person_detection(car_dest_list, bbox_dets_list, img) # FOR NEXPA # self.person_nexpa(bbox_dets_list,img,img_id) ckpt_time, det_time = getTime(start_time) # cv2.putText(img, str(1 / det_time), (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 1) cv2.putText(img, str(img_id), (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 1) if opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(33) if opt.save_video: self.stream.write(img) else: time.sleep(0.1)
def test(): inputpath = args.inputpath inputlist = args.inputlist mode = args.mode #if not os.path.exists(args.outputpath): #os.mkdir(args.outputpath) #if len(inputlist): #im_names = open(inputlist, 'r').readlines() #elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files #else: #raise IOError('Error: must contain either --indir/--list') im_names = sorted(im_names, key=lambda x: int(os.path.splitext(x)[0])) print(im_names) # Load input images data_loader = ImageLoader(im_names, batchSize=1, format='yolo').start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=1).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result
def handle_video(video_file): # =========== common =============== args.video = video_file base_name = os.path.basename(args.video) video_name = base_name[:base_name.rfind('.')] # =========== end common =============== # =========== image =============== # img_path = f'outputs/alpha_pose_{video_name}/split_image/' # args.inputpath = img_path # args.outputpath = f'outputs/alpha_pose_{video_name}' # if os.path.exists(args.outputpath): # shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) # else: # os.mkdir(args.outputpath) # # if not len(video_file): # # raise IOError('Error: must contain --video') # if len(img_path) and img_path != '/': # for root, dirs, files in os.walk(img_path): # im_names = sorted([f for f in files if 'png' in f or 'jpg' in f]) # else: # raise IOError('Error: must contain either --indir/--list') # # Load input images # data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # print(f'Totally {data_loader.datalen} images') # =========== end image =============== # =========== video =============== args.outputpath = f'outputs/alpha_pose_{video_name}' if os.path.exists(args.outputpath): shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) else: os.mkdir(args.outputpath) videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # =========== end video =============== # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model #.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi') # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() writer = DataWriter(args.save_video).start() print('Start pose estimation...') im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: print(f'{i}-th image read None: handle_video') break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)] #.cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result, video_name
def handle_video(videofile, no_nan=True): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() cam_w = frameSize[0] cam_h = frameSize[1] print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch frames_w_pose = [] frame_cnt = 0 for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break frame_cnt += 1 if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue frames_w_pose.append(frame_cnt - 1) ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() kpts = [] if not no_nan: for i in range(frame_cnt): # initialize to NaN so we can interpolate later kpts.append(np.full((17, 2), np.nan, dtype=np.float32)) for i in range(len(final_result)): try: kpt = final_result[i]['result'][0]['keypoints'] if not no_nan: kpts[frames_w_pose[i]] = kpt.data.numpy() else: kpts.append(kpt.data.numpy()) except: print('error...') kpts = np.array(kpts).astype(np.float32) #filename = os.path.basename(args.video).split('.')[0] #name = filename + '.npz' #print('kpts npz save in ', name) #np.savez_compressed(name, kpts=kpts, fps=fps, cam_w=cam_w, cam_h=cam_h) return kpts, fps, cam_w, cam_h
def update(self): # keep looping the whole dataset """ :return: """ for i in range(self.num_batches): # repeat img, orig_img, im_name, im_dim_list = self.dataloder.getitem() # img = (batch, frames) if img is None: self.Q.put((None, None, None, None, None, None, None)) return start_time = getTime() with torch.no_grad(): # Human Detection img = img.cuda() # image ( B, 3, 608,608 ) prediction = self.det_model(img, CUDA=True) # ( B, 22743, 85 ) = ( batchsize, proposal boxes, xywh+cls) # predictions for each B image. # NMS process carperson = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(carperson, int) or carperson.shape[0] == 0: for k in range(len(orig_img)): if self.Q.full(): time.sleep(0.5) self.Q.put((orig_img[k], im_name[k], None, None, None, None, None, None)) # 8 elements continue ckpt_time, det_time = getTime(start_time) carperson = carperson.cpu() # (1) k-th image , (7) x,y,w,h,c, cls_score, cls_index im_dim_list = torch.index_select(im_dim_list, 0, carperson[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer carperson[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 carperson[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 carperson[:, 1:5] /= scaling_factor for j in range(carperson.shape[0]): carperson[j, [1, 3]] = torch.clamp(carperson[j, [1, 3]], 0.0, im_dim_list[j, 0]) carperson[j, [2, 4]] = torch.clamp(carperson[j, [2, 4]], 0.0, im_dim_list[j, 1]) cls_car_mask = carperson * (carperson[:, -1] == 2).float().unsqueeze(1) # car class__car_mask_ind = torch.nonzero(cls_car_mask[:, -2]).squeeze() car_dets = carperson[class__car_mask_ind].view(-1, 8) cls_person_mask = carperson * (carperson[:, -1] == 0).float().unsqueeze(1) # person class__person_mask_ind = torch.nonzero(cls_person_mask[:, -2]).squeeze() hm_dets = carperson[class__person_mask_ind].view(-1, 8) ckpt_time, masking_time = getTime(ckpt_time) hm_boxes, hm_scores = None, None if hm_dets.size(0) > 0: hm_boxes = hm_dets[:, 1:5] hm_scores = hm_dets[:, 5:6] car_box_conf = None if car_dets.size(0) > 0: car_box_conf = car_dets for k in range(len(orig_img)): # for k-th image detection. if car_box_conf is None: car_k = None else: car_k = car_box_conf[car_box_conf[:, 0] == k].numpy() car_k = car_k[np.where(car_k[:, 5] > 0.2)] # TODO check here, cls or bg/fg confidence? # car_k = non_max_suppression_fast(car_k, overlapThresh=0.3) # TODO check here, NMS if hm_boxes is not None: hm_boxes_k = hm_boxes[hm_dets[:, 0] == k] hm_scores_k = hm_scores[hm_dets[:, 0] == k] inps = torch.zeros(hm_boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(hm_boxes_k.size(0), 2) pt2 = torch.zeros(hm_boxes_k.size(0), 2) item = (orig_img[k], im_name[k], hm_boxes_k, hm_scores_k, inps, pt1, pt2, car_k) # print('video processor ', 'image' , im_name[k] , 'hm box ' , hm_boxes_k.size()) else: item = (orig_img[k], im_name[k], None, None, None, None, None, car_k) # 8-elemetns if self.Q.full(): time.sleep(0.5) self.Q.put(item) ckpt_time, distribute_time = getTime(ckpt_time)
def alphapose_process_video(videofile, pose_result_handler, inference_steps=1): if not len(videofile): raise IOError("Error: must contain --video") # Load input video print(f"Opening video {videofile}") data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() # Load detection loader print("Loading YOLO model..") sys.stdout.flush() det_loader = DetectionLoader( data_loader, batchSize=args.detbatch, path=ALPHAPOSE_DIR, inference_steps=inference_steps, ).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset, path=ALPHAPOSE_DIR) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset, path=ALPHAPOSE_DIR) pose_model.cuda() pose_model.eval() runtime_profile = {"dt": [], "pt": [], "pn": []} # Data writer args.save_video = args.video_savefile is not None writer = DataWriter( save_video=args. save_video, # Note: DataWriter uses args.save_video internally as well savepath=args.video_savefile, fourcc=cv2.VideoWriter_fourcc(*"XVID"), fps=fps, frameSize=frameSize, result_handler=pose_result_handler, ).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split("/")[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile["dt"].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile["pt"].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split("/")[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile["pn"].append(post_time) if args.profile: # TQDM im_names_desc.set_description( "det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}" .format( dt=np.mean(runtime_profile["dt"]), pt=np.mean(runtime_profile["pt"]), pn=np.mean(runtime_profile["pn"]), )) print("===========================> Finish Model Running.") if (args.save_img or args.video_savefile) and not args.vis_fast: print( "===========================> Rendering remaining images in the queue..." ) print( "===========================> If this step takes too long, you can enable " "the --vis_fast flag to use fast rendering (real-time).") while writer.running(): pass writer.stop() return writer.results()
def detect_main(args, im_names, yolo_model, pose_net): # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader det_loader = DetectionLoader(data_loader, model=yolo_model, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = { 'dt': [], 'pt': [], 'pn': [] } # Init data writer # writer = DataWriter(args.save_video).start() data_len = data_loader.length() fall_res_all = [] batchSize = args.posebatch for i in range(data_len): start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: # writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation # print(im_name) datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_net(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() # writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) fall_res = [] fall_res.append(im_name.split('/')[-1]) if boxes is None: cv2.imwrite(opt.outputpath + '/' + im_name.split('/')[-1], img) else: if opt.matching: preds = getMultiPeakPrediction( hm, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction(hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} img = vis_frame(orig_img, result) for human in result['result']: keypoint = human['keypoints'] keypoint = keypoint.numpy() xmax = max(keypoint[:, 0]) xmin = min(keypoint[:, 0]) ymax = max(keypoint[:, 1]) ymin = min(keypoint[:, 1]) w = xmax - xmin h = ymax - ymin distance = abs((keypoint[15][1] + keypoint[16][1]) / 2 - (keypoint[11][1] + keypoint[12][1]) / 2) if w / h >= 0.95: cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(img, 'Warning!Fall', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 0, 255), 2) fall_res.append([xmin,ymin,xmax,ymax]) ''' print('1 location:[%f,' % (xmin) + '%f]' % (ymin) + ' [%f,' % (xmax) + '%f]' % ( ymin) + ' [%f,' % ( xmin) + '%f]' % (ymax) + ' [%f,' % (xmax) + '%f]' % (ymax)) ''' else: if distance < 55: cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(img, 'Warning!Fall!', (int(xmin + 10), int(ymax - 10)), font, 1, (0, 255, 0), 2) fall_res.append(1) fall_res.append([xmin,ymin,xmax,ymax]) ''' print('1 location:[%f,' % (xmin) + '%f]' % (ymin) + ' [%f,' % (xmax) + '%f]' % ( ymin) + ' [%f,' % ( xmin) + '%f]' % (ymax) + ' [%f,' % (xmax) + '%f]' % (ymax)) ''' else: cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) #cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) print(fall_res) cv2.imwrite(opt.outputpath + '/' + im_name.split('/')[-1], img) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) fall_res_all.append(fall_res) return fall_res_all
def run(): ret = [] # for i in im_names_desc: # try: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) # continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation if (inps == None): writer.save(boxes, scores, [], pt1, pt2, orig_img, im_name.split('/')[-1]) return [] datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data # new code preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) print("\n") for human in result: kp_preds = human['keypoints'] kp_scores = human['kp_score'] kp_preds = torch.cat( (kp_preds, torch.unsqueeze((kp_preds[5, :] + kp_preds[6, :]) / 2, 0))) kp_scores = torch.cat( (kp_scores, torch.unsqueeze((kp_scores[5, :] + kp_scores[6, :]) / 2, 0))) for n in range(kp_scores.shape[0]): if kp_scores[n] <= 0.05: continue cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) if (n == 0): print('코' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 1): print('왼눈' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 2): print('오눈' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 3): print('왼귀' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 4): print('오귀' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 5): print('왼어' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 6): print('오어' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) if (n == 17): print('목' + ': ' + str(cor_x) + ', ' + str(cor_y)) ret.append([n, cor_x, cor_y]) writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) # except KeyboardInterrupt: # break return ret
def parse(self): if not os.path.exists(self.output_path): os.mkdir(self.output_path) data_loader = WebcamLoader(self.camera_info.videoAddress).start() (fourcc, fps, frameSize) = data_loader.videoinfo() sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=self.detbatch).start() det_processor = DetectionProcessor(det_loader).start() aligner = AlignPoints() # Data writer # save_path = os.path.join(args.outputpath, 'AlphaPose_webcam' + webcam + '.avi') writer = DataWriter(self.save_video, self.output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize, pos_reg_model=pos_reg_model, aligner=aligner).start() # 统计时间使用 runtime_profile = {'dt': [], 'pt': [], 'pn': []} sys.stdout.flush() batch_size = self.detbatch while True: try: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batch_size: leftover = 1 num_batches = datalen // batch_size + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batch_size:min( (j + 1) * batch_size, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) while not writer.result_Q.empty(): boxes, classidx = writer.result_Q.get() print('classidx:', classidx) for aged in self.camera.roomInfo.agesInfos: # 遍历本摄像头所在房间的老人信息,目前只考虑房间只有一个人 if not aged.id in ages.keys(): ages[aged.id] = PoseInfo( agesInfoId=aged.id, date=time.strftime('%Y-%m-%dT00:00:00', time.localtime()), timeStand=0, timeSit=0, timeLie=0, timeDown=0, timeOther=0) # 更新被监护对象各种状态的时间值 pose_detect_with_video(aged.id, classidx) break # 创建或更新PoseInfo数据库记录 pose_url = Conf.Urls.PoseInfoUrl + '/UpdateOrCreatePoseInfo' HttpHelper.create_item(pose_url, ages[aged.id]) ckpt_time, post_time = getTime(ckpt_time) except KeyboardInterrupt: break while (writer.running()): pass writer.stop()
def main(file_name): # videofile = args.video videofile = file_name mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data import ipdb ipdb.set_trace() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath)
def update(self): next_id = 0 car_next_id = 0 bbox_dets_list_list = [] keypoints_list_list = [] car_dets_list_list = [] while True: # if the thread indicator variable is set, stop the # thread if self.stopped: if self.save_video: self.stream.release() return # otherwise, ensure the queue is not empty if not self.Q.empty(): start_time = getTime() (boxes, scores, hm_data, pt1, pt2, orig_img, img_id, CAR) = self.Q.get() orig_img = np.array(orig_img, dtype=np.uint8) if boxes is not None: boxes = boxes.astype(np.int32) img = orig_img # text_filled2(img,(5,200),str(img_id),LIGHT_GREEN,2,2) bbox_dets_list = [] # keyframe: start from empty keypoints_list = [] # keyframe: start from empty # print(boxes) if boxes is None: # No person detection pass # bbox_det_dict = {"img_id": img_id, # "det_id": 0, # "track_id": None, # "bbox": [0, 0, 2, 2]} # bbox_dets_list.append(bbox_det_dict) # # keypoints_dict = {"img_id": img_id, # "det_id": 0, # "track_id": None, # "keypoints": []} # keypoints_list.append(keypoints_dict) else: if opt.matching: preds = getMultiPeakPrediction( hm_data, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction(hm_data, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) # print('number of result', preds_hm, preds_scores ) result = pose_nms(boxes, scores, preds_img, preds_scores) # list type # result = { 'keypoints': , 'kp_score': , 'proposal_score': , 'bbox' } if img_id > 0: # First frame does not have previous frame bbox_list_prev_frame = bbox_dets_list_list[img_id - 1].copy() keypoints_list_prev_frame = keypoints_list_list[img_id - 1].copy() else: bbox_list_prev_frame = [] keypoints_list_prev_frame = [] # boxes.size(0) num_dets = len(result) for bbox in boxes: x, y, w, h = bbox.astype(np.uint32) cv2.rectangle(orig_img, (x, y), (x + w, y + h), (253, 222, 111), 1) for det_id in range(num_dets): # IOU tracking for detections in current frame. # detections for current frame # obtain bbox position and track id result_box = result[det_id] kp_score = result_box['kp_score'] proposal_score = result_box['proposal_score'].numpy()[0] if proposal_score < 1.3: continue keypoints = result_box['keypoints'] # torch, (17,2) keypoints_pf = np.zeros((15, 2)) idx_list = [16, 14, 12, 11, 13, 15, 10, 8, 6, 5, 7, 9, 0, 0, 0] for i, idx in enumerate(idx_list): keypoints_pf[i] = keypoints[idx] keypoints_pf[12] = (keypoints[5] + keypoints[6]) / 2 # neck # COCO-order {0-nose 1-Leye 2-Reye 3-Lear 4Rear 5-Lsho 6-Rsho 7-Lelb 8-Relb 9-Lwri 10-Rwri 11-Lhip 12-Rhip 13-Lkne 14-Rkne 15-Lank 16-Rank} # PoseFLow order #{0-Rank 1-Rkne 2-Rhip 3-Lhip 4-Lkne 5-Lank 6-Rwri 7-Relb 8-Rsho 9-Lsho 10-Lelb 11-Lwri 12-neck 13-nose 14-TopHead} bbox_det = bbox_from_keypoints(keypoints) # xxyy # bbox_in_xywh = enlarge_bbox(bbox_det, enlarge_scale) # bbox_det = x1y1x2y2_to_xywh(bbox_in_xywh) # Keyframe: use provided bbox # if bbox_invalid(bbox_det): # track_id = None # this id means null # keypoints = [] # bbox_det = [0, 0, 2, 2] # # update current frame bbox # bbox_det_dict = {"img_id": img_id, # "det_id": det_id, # "track_id": track_id, # "bbox": bbox_det} # bbox_dets_list.append(bbox_det_dict) # # update current frame keypoints # keypoints_dict = {"img_id": img_id, # "det_id": det_id, # "track_id": track_id, # "keypoints": keypoints} # keypoints_list.append(keypoints_dict) # continue # # update current frame bbox if img_id == 0: # First frame, all ids are assigned automatically track_id = next_id next_id += 1 else: track_id, match_index = get_track_id_SpatialConsistency(bbox_det, bbox_list_prev_frame) # print('track' ,track_id, match_index) if track_id != -1: # if candidate from prev frame matched, prevent it from matching another del bbox_list_prev_frame[match_index] del keypoints_list_prev_frame[match_index] # update current frame bbox bbox_det_dict = {"img_id": img_id, "det_id": det_id, "track_id": track_id, "bbox": bbox_det} # update current frame keypoints keypoints_dict = {"img_id": img_id, "det_id": det_id, "track_id": track_id, "keypoints": keypoints, 'kp_poseflow': keypoints_pf, 'kp_score': kp_score, 'bbox': bbox_det, 'proposal_score': proposal_score} bbox_dets_list.append(bbox_det_dict) keypoints_list.append(keypoints_dict) num_dets = len(bbox_dets_list) for det_id in range(num_dets): # if IOU tracking failed, run pose matching tracking. bbox_det_dict = bbox_dets_list[det_id] keypoints_dict = keypoints_list[det_id] # assert (det_id == bbox_det_dict["det_id"]) # assert (det_id == keypoints_dict["det_id"]) if bbox_det_dict["track_id"] == -1: # this id means matching not found yet # track_id = bbox_det_dict["track_id"] track_id, match_index = get_track_id_SGCN(bbox_det_dict["bbox"], bbox_list_prev_frame, keypoints_dict["kp_poseflow"], keypoints_list_prev_frame) if track_id != -1: # if candidate from prev frame matched, prevent it from matching another del bbox_list_prev_frame[match_index] del keypoints_list_prev_frame[match_index] bbox_det_dict["track_id"] = track_id keypoints_dict["track_id"] = track_id # if still can not find a match from previous frame, then assign a new id # if track_id == -1 and not bbox_invalid(bbox_det_dict["bbox"]): if track_id == -1: bbox_det_dict["track_id"] = next_id keypoints_dict["track_id"] = next_id next_id += 1 # update frame # print('keypoint list', len(keypoints_list)) vis_frame(img, keypoints_list) """ Car """ if CAR is not None: car_np = CAR new_car_bboxs = car_np[:, 0:4].astype(np.uint32) # b/ x y w h c / cls_conf, cls_idx new_car_score = car_np[:, 4] cls_conf = car_np[:, 4] # print("id: ", img_id , " ------------ " , new_car_bboxs, new_car_score) # cls_conf = car_np[:, 6] car_dest_list = [] if img_id > 1: # First frame does not have previous frame car_bbox_list_prev_frame = car_dets_list_list[img_id - 1].copy() else: car_bbox_list_prev_frame = [] # print('car bbox list prev frame ', len(car_bbox_list_prev_frame)) for c, score, conf in zip(new_car_bboxs, new_car_score, cls_conf): # car_bbox_det = c # car_bbox_det = x1y1x2y2_to_xywh(c) bbox_det = c # bbox_in_xywh = enlarge_bbox(car_bbox_det, enlarge_scale) # bbox_det = x1y1x2y2_to_xywh(bbox_in_xywh) if img_id == 0: # First frame, all ids are assigned automatically car_track_id = car_next_id car_next_id += 1 else: car_track_id, match_index = get_track_id_SpatialConsistency(bbox_det, car_bbox_list_prev_frame) # print(car_track_id, match_index) if car_track_id != -1: # if candidate from prev frame matched, prevent it from matching another del car_bbox_list_prev_frame[match_index] bbox_det_dict = {"img_id": img_id, "track_id": car_track_id, "bbox": bbox_det, "score": score, "conf": conf} car_dest_list.append(bbox_det_dict) for car_bbox_det_dict in car_dest_list: # detections for current frame if car_bbox_det_dict["track_id"] == -1: # this id means matching not found yet car_bbox_det_dict["track_id"] = car_next_id car_next_id += 1 self.tracking(car_dest_list) car_dets_list_list.append(car_dest_list) else: car_dest_list = [] bbox_det_dict = {"img_id": img_id, "det_id": 0, "track_id": None, "bbox": [0, 0, 2, 2], "score": 0, "conf": 0} car_dest_list.append(bbox_det_dict) car_dets_list_list.append(car_dest_list) bbox_dets_list_list.append(bbox_dets_list) keypoints_list_list.append(keypoints_list) if img_id != 0: self.car_person_detection(car_dest_list, bbox_dets_list, img) self.car_parking_detection(car_dest_list, img, img_id) ckpt_time, det_time = getTime(start_time) cv2.putText(img, str(1 / det_time), (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 1) if opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(33) if opt.save_video: self.stream.write(img) else: time.sleep(0.1)
def Alphapose( im_names, pose_model, ): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].to(device) hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # write_json(final_result, args.outputpath) if final_result[0]['result']: return final_result[0]['result'][0]['keypoints'] else: return None