# Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1
def handle_video(video_file): # =========== common =============== args.video = video_file base_name = os.path.basename(args.video) video_name = base_name[:base_name.rfind('.')] # =========== end common =============== # =========== image =============== # img_path = f'outputs/alpha_pose_{video_name}/split_image/' # args.inputpath = img_path # args.outputpath = f'outputs/alpha_pose_{video_name}' # if os.path.exists(args.outputpath): # shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) # else: # os.mkdir(args.outputpath) # # # if not len(video_file): # # raise IOError('Error: must contain --video') # # if len(img_path) and img_path != '/': # for root, dirs, files in os.walk(img_path): # im_names = sorted([f for f in files if 'png' in f or 'jpg' in f]) # else: # raise IOError('Error: must contain either --indir/--list') # # # Load input images # data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # print(f'Totally {data_loader.datalen} images') # =========== end image =============== # =========== video =============== dir_name = os.path.dirname(video_file) dir_name_split = dir_name[:dir_name.rfind('/')] new_dir_name = os.path.join(dir_name_split, 'outputvideo') #args.outputpath = f'outputs/alpha_pose_{video_name}' args.outputpath = new_dir_name + f'/alpha_pose_{video_name}' if os.path.exists(args.outputpath): shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) else: os.mkdir(args.outputpath) videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # =========== end video =============== # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi') # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() writer = DataWriter(args.save_video).start() print('Start pose estimation...') im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: print(f'{i}-th image read None: handle_video') break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result, video_name
def main(file_name): # videofile = args.video videofile = file_name mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data import ipdb ipdb.set_trace() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath)
def handle_video(videofile, no_nan=True): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() cam_w = frameSize[0] cam_h = frameSize[1] print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch frames_w_pose = [] frame_cnt = 0 for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break frame_cnt += 1 if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue frames_w_pose.append(frame_cnt - 1) ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() kpts = [] if not no_nan: for i in range(frame_cnt): # initialize to NaN so we can interpolate later kpts.append(np.full((17, 2), np.nan, dtype=np.float32)) for i in range(len(final_result)): try: kpt = final_result[i]['result'][0]['keypoints'] if not no_nan: kpts[frames_w_pose[i]] = kpt.data.numpy() else: kpts.append(kpt.data.numpy()) except: print('error...') kpts = np.array(kpts).astype(np.float32) #filename = os.path.basename(args.video).split('.')[0] #name = filename + '.npz' #print('kpts npz save in ', name) #np.savez_compressed(name, kpts=kpts, fps=fps, cam_w=cam_w, cam_h=cam_h) return kpts, fps, cam_w, cam_h
'dt': [], 'pt': [], 'pn': [] } # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches):
def Alphapose( im_names, pose_model, ): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].to(device) hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # write_json(final_result, args.outputpath) if final_result[0]['result']: return final_result[0]['result'][0]['keypoints'] else: return None
def main(args): inputpath = args.inputpath inputlist = args.inputlist mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if len(inputlist): im_names = open(inputlist, 'r').readlines() elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files else: raise IOError('Error: must contain either --indir/--list') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath)
class AlphaPose(): def __init__(self, videofile, mode='normal'): self.videofile = videofile self.mode = mode self.data_loader = VideoLoader(self.videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = self.data_loader.videoinfo() self.fourcc = fourcc self.fps = fps self.frameSize = frameSize self.det_loader = DetectionLoader(self.data_loader, batchSize=args.detbatch).start() self.det_processor = DetectionProcessor(self.det_loader).start() self.pose_dataset = Mscoco() save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(self.videofile).split('.')[0] + '.mp4') self.writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'DIVX'), self.fps, self.frameSize).start() self.results = list() def pose_estimation(self, pose_model): batchSize = args.posebatch for i in range(self.data_loader.length()): with torch.no_grad( ): #不計算導數以此減少運算量 可用在model evaluating時 將inference的code放在其中 (inps, orig_img, im_name, boxes, scores, pt1, pt2) = self.det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: self.writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu().data self.writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) def run(self): args.mode = self.mode if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, self.pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, self.pose_dataset) pose_model.cuda() pose_model.eval() #pose estimation print('Start Pose Estimating...') self.pose_estimation(pose_model) print('Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print('Rendering remaining images in the queue...') while (self.writer.running()): pass self.writer.stop() self.results = self.writer.results().copy() def arm_pos(self): arms = [] for frame in self.results: #body是每個人的骨架的dictionary jpg = frame['imgname'].split('.') for body in frame['result']: arm = [ ] #[int, tuple, tuple, tuple, tuple] -> [frame_number, left_x1y1, left_x2y2, right_x1y1, right_x2y2] arm.append(int(jpg[0])) joint_list = body['keypoints'].tolist() arm.append((int(joint_list[7][0]), int(joint_list[7][1]))) arm.append((int(joint_list[9][0]), int(joint_list[9][1]))) arm.append((int(joint_list[8][0]), int(joint_list[8][1]))) arm.append((int(joint_list[10][0]), int(joint_list[10][1]))) arms.append(arm) return arms def arm_pos_json(self, json_path): with open(json_path, 'r', encoding='utf-8') as f: result = json.load(f) #json_format arms = [] l_index = [21, 27] r_index = [24, 30] for body in result: #body是每個人的骨架的dictionary arm = [ ] #[int, tuple, tuple, tuple, tuple] -> [frame_number, left_x1y1, left_x2y2, right_x1y1, right_x2y2] jpg = body['image_id'].split('.') arm.append(int(jpg[0])) for i in l_index: arm.append( (int(body['keypoints'][i]), int(body['keypoints'][i + 1]))) for i in r_index: arm.append( (int(body['keypoints'][i]), int(body['keypoints'][i + 1]))) arms.append(arm) return arms