im_dim_list = im_dim_list.cuda() prediction = det_model(img, CUDA=True) ckpt_time, det_time = getTime(ckpt_time) runtime_profile['dt'].append(det_time) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: writer.save(None, None, None, None, None, orig_img, im_name=str(i) + '.jpg') continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2
def handle_video(videofile): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # 获取第 0 个框的人 kpts = [] for i in range(len(final_result)): try: preds = final_result[i]['result'] # preds[i]['keypoints'] (17,2) # preds[i]['kp_score'] (17,1) # preds[i]['proposal_score'] (1) # 选择 y 坐标最大的人 —— 用于打羽毛球视频 max_index = 0 min_index = 0 # max_y = np.mean(preds[0]['keypoints'].data.numpy()[:, 1]) min_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0]) max_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0]) for k in range(len(preds)): # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1]) tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0]) # if tmp_y > max_y: if tmp_x < min_x: min_index = k # max_y = tmp_y min_x = tmp_x for k in range(len(preds)): # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1]) tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0]) # if tmp_y > max_y: if tmp_x > max_x: max_index = k max_x = tmp_x mid_index = 0 for k in range(len(preds)): if k == max_index or k == min_index: continue mid_index = k kpt = preds[mid_index]['keypoints'] # kpt = final_result[i]['result'][0]['keypoints'] kpts.append(kpt.data.numpy()) except: # print(sys.exc_info()) print('error...') filename = os.path.basename(args.video).split('.')[0] name = filename + '.npz' kpts = np.array(kpts).astype(np.float32) # print('kpts npz save in ', name) # np.savez_compressed(name, kpts=kpts) return kpts
'pn': [] } # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j*batchSize:min((j + 1)*batchSize, datalen)].cuda() hm_j = pose_model(inps_j)
def test(): inputpath = args.inputpath inputlist = args.inputlist mode = args.mode #if not os.path.exists(args.outputpath): #os.mkdir(args.outputpath) #if len(inputlist): #im_names = open(inputlist, 'r').readlines() #elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files #else: #raise IOError('Error: must contain either --indir/--list') im_names = sorted(im_names, key=lambda x: int(os.path.splitext(x)[0])) print(im_names) # Load input images data_loader = ImageLoader(im_names, batchSize=1, format='yolo').start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=1).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result
def call_alphapose(input_dir, output_dir, format='open', batchSize=1): if not os.path.exists(output_dir): os.mkdir(output_dir) for root, dirs, files in os.walk(input_dir): im_names = files print(files) data_loader = ImageLoader(im_names, batchSize=batchSize, format='yolo', dir_path=input_dir).start() det_loader = DetectionLoader(data_loader, batchSize=batchSize).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(False).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, output_dir, _format=format) correct_json_save(output_dir) print('Over')
def handle_video(video_file): # =========== common =============== args.video = video_file base_name = os.path.basename(args.video) video_name = base_name[:base_name.rfind('.')] # =========== end common =============== # =========== image =============== # img_path = f'outputs/alpha_pose_{video_name}/split_image/' # args.inputpath = img_path # args.outputpath = f'outputs/alpha_pose_{video_name}' # if os.path.exists(args.outputpath): # shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) # else: # os.mkdir(args.outputpath) # # if not len(video_file): # # raise IOError('Error: must contain --video') # if len(img_path) and img_path != '/': # for root, dirs, files in os.walk(img_path): # im_names = sorted([f for f in files if 'png' in f or 'jpg' in f]) # else: # raise IOError('Error: must contain either --indir/--list') # # Load input images # data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # print(f'Totally {data_loader.datalen} images') # =========== end image =============== # =========== video =============== args.outputpath = f'outputs/alpha_pose_{video_name}' if os.path.exists(args.outputpath): shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) else: os.mkdir(args.outputpath) videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # =========== end video =============== # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model #.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi') # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() writer = DataWriter(args.save_video).start() print('Start pose estimation...') im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: print(f'{i}-th image read None: handle_video') break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)] #.cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result, video_name
im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in range(data_loader.length()): if opt.profile: im_names_desc.update(1) start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2, CAR) = det_processor.read() if orig_img is None: break # if boxes is None or boxes.nelement() == 0: if boxes is None: writer.save(None, None, None, None, None, orig_img, im_name, CAR) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda()
yolo_end = time.time() _yolo_delta = yolo_end - yolo_start #print('######## YOLO_time FPS: %f:'%(1/_yolo_delta)) # ################### ckpt_time, det_time = getTime(ckpt_time) runtime_profile['dt'].append(det_time) # NMS process dets = dynamic_write_results(prediction, args.confidence, args.num_classes, nms=True, nms_conf=args.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: writer.save(None, orig_img=orig_img, im_name=str(i) + '.jpg') continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]):
im_dim_list = im_dim_list.cuda() prediction = det_model(img, CUDA=True) ckpt_time, det_time = getTime(ckpt_time) runtime_profile['dt'].append(det_time) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: writer.save(None, None, None, None, None, orig_img, im_name=int(i)) continue im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor
def main(file_name): # videofile = args.video videofile = file_name mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data import ipdb ipdb.set_trace() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath)
def handle_video(videofile, no_nan=True): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() cam_w = frameSize[0] cam_h = frameSize[1] print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch frames_w_pose = [] frame_cnt = 0 for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break frame_cnt += 1 if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue frames_w_pose.append(frame_cnt - 1) ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() kpts = [] if not no_nan: for i in range(frame_cnt): # initialize to NaN so we can interpolate later kpts.append(np.full((17, 2), np.nan, dtype=np.float32)) for i in range(len(final_result)): try: kpt = final_result[i]['result'][0]['keypoints'] if not no_nan: kpts[frames_w_pose[i]] = kpt.data.numpy() else: kpts.append(kpt.data.numpy()) except: print('error...') kpts = np.array(kpts).astype(np.float32) #filename = os.path.basename(args.video).split('.')[0] #name = filename + '.npz' #print('kpts npz save in ', name) #np.savez_compressed(name, kpts=kpts, fps=fps, cam_w=cam_w, cam_h=cam_h) return kpts, fps, cam_w, cam_h
dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5].cpu() scores = dets[:, 5:6].cpu() ckpt_time, detNMS_time = getTime(ckpt_time) runtime_profile['dn'].append(detNMS_time) # Pose Estimation inps, pt1, pt2 = crop_from_dets(inp[0], boxes) inps = Variable(inps.cuda()) hm = pose_model(inps) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) writer.save(boxes, scores, hm.cpu().data, pt1, pt2, np.array(orig_img[0], dtype=np.uint8), im_name[0].split('/')[-1]) #writer.save(boxes, scores, hm.cpu().data, pt1, pt2, orig_img[0], im_name[0].split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | det NMS: {dn:.4f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), dn=np.mean(runtime_profile['dn']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) ''' # TQDM im_names_desc.set_description( 'Speed: {speed:.2f}'.format(
def Alphapose( im_names, pose_model, ): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].to(device) hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # write_json(final_result, args.outputpath) if final_result[0]['result']: return final_result[0]['result'][0]['keypoints'] else: return None
class AlphaPose(): def __init__(self, videofile, mode='normal'): self.videofile = videofile self.mode = mode self.data_loader = VideoLoader(self.videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = self.data_loader.videoinfo() self.fourcc = fourcc self.fps = fps self.frameSize = frameSize self.det_loader = DetectionLoader(self.data_loader, batchSize=args.detbatch).start() self.det_processor = DetectionProcessor(self.det_loader).start() self.pose_dataset = Mscoco() save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(self.videofile).split('.')[0] + '.mp4') self.writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'DIVX'), self.fps, self.frameSize).start() self.results = list() def pose_estimation(self, pose_model): batchSize = args.posebatch for i in range(self.data_loader.length()): with torch.no_grad( ): #不計算導數以此減少運算量 可用在model evaluating時 將inference的code放在其中 (inps, orig_img, im_name, boxes, scores, pt1, pt2) = self.det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: self.writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu().data self.writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) def run(self): args.mode = self.mode if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, self.pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, self.pose_dataset) pose_model.cuda() pose_model.eval() #pose estimation print('Start Pose Estimating...') self.pose_estimation(pose_model) print('Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print('Rendering remaining images in the queue...') while (self.writer.running()): pass self.writer.stop() self.results = self.writer.results().copy() def arm_pos(self): arms = [] for frame in self.results: #body是每個人的骨架的dictionary jpg = frame['imgname'].split('.') for body in frame['result']: arm = [ ] #[int, tuple, tuple, tuple, tuple] -> [frame_number, left_x1y1, left_x2y2, right_x1y1, right_x2y2] arm.append(int(jpg[0])) joint_list = body['keypoints'].tolist() arm.append((int(joint_list[7][0]), int(joint_list[7][1]))) arm.append((int(joint_list[9][0]), int(joint_list[9][1]))) arm.append((int(joint_list[8][0]), int(joint_list[8][1]))) arm.append((int(joint_list[10][0]), int(joint_list[10][1]))) arms.append(arm) return arms def arm_pos_json(self, json_path): with open(json_path, 'r', encoding='utf-8') as f: result = json.load(f) #json_format arms = [] l_index = [21, 27] r_index = [24, 30] for body in result: #body是每個人的骨架的dictionary arm = [ ] #[int, tuple, tuple, tuple, tuple] -> [frame_number, left_x1y1, left_x2y2, right_x1y1, right_x2y2] jpg = body['image_id'].split('.') arm.append(int(jpg[0])) for i in l_index: arm.append( (int(body['keypoints'][i]), int(body['keypoints'][i + 1]))) for i in r_index: arm.append( (int(body['keypoints'][i]), int(body['keypoints'][i + 1]))) arms.append(arm) return arms