def generate_kpts(video_file): final_result, video_name = handle_video(video_file) # ============ Changing ++++++++++ kpts = [] no_person = [] for i in range(len(final_result)): if not final_result[i]['result']: # No people no_person.append(i) kpts.append(None) continue kpt = max(final_result[i]['result'], key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints'] kpts.append(kpt.data.numpy()) for n in no_person: kpts[n] = kpts[-1] no_person.clear() for n in no_person: kpts[n] = kpts[-1] if kpts[-1] else kpts[n - 1] # ============ Changing End ++++++++++ print(args.outputpath) name = '{0}/{1}.npz'.format(args.outputpath, video_name) kpts = np.array(kpts).astype(np.float32) print('kpts npz save in ', name) np.savez_compressed(name, kpts=kpts) return kpts
def remove_irrelevant(no_track_result, save_percent=0.5): """ Do pruning for the image that more than 20 people appear. :param no_track_result: AlphaPose result json dict before pruning :param save_percent: Kept percentage, (0, 1] :return: pruned result dict """ id_map = defaultdict(list) for result in no_track_result: id_map[result['image_id']].append(result) relevant_result = [] for values in id_map.values(): num = len(values) if num > 0: values.sort(key=lambda m: m['score'] * calculate_area(m['keypoints']), reverse=True) relevant_result.extend(values[: int(num * save_percent)]) return relevant_result
def update(self): time1 = time.time() _, frame = self.stream.read() # frame = cv2.resize(frame, (frame.shape[1]//2,frame.shape[0]//2)) #TODO TESTING # frame[:,:200,:]=0 # frame[:,450:,:]=0 img_k, self.orig_img, im_dim_list_k = prep_frame(frame, self.inp_dim) img = [img_k] im_name = ["im_name"] im_dim_list = [im_dim_list_k] img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) time2 = time.time() with torch.no_grad(): ### detector ######################### # Human Detection img = img.cuda() prediction = self.det_model(img, CUDA=True) # NMS process dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True, nms_conf=opt.nms_thesh) if isinstance(dets, int) or dets.shape[0] == 0: self.visualize2dnoperson() return None dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: self.visualize2dnoperson() raise NotImplementedError return None inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) time3 = time.time() ### processor ######################### inp = im_to_torch(cv2.cvtColor(self.orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2) ### generator ######################### self.orig_img = np.array(self.orig_img, dtype=np.uint8) # location prediction (n, kp, 2) | score prediction (n, kp, 1) datalen = inps.size(0) batchSize = 20 #args.posebatch() leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] time4 = time.time() for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) hm = hm.cpu().data preds_hm, preds_img, preds_scores = getPrediction( hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms( boxes, scores, preds_img, preds_scores) time5 = time.time() if not result: # No people self.visualize2dnoperson() return None else: self.kpt = max(result, key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints'] self.visualize2d() return self.kpt time6 = time.time() print("process time : {} ".format(time6 - time5))
def handle_video(video_file): # =========== common =============== args.video = video_file base_name = os.path.basename(args.video) video_name = base_name[:base_name.rfind('.')] # =========== end common =============== img_path = f'outputs/alpha_pose_{video_name}/split_image/' # =========== image =============== args.inputpath = img_path args.outputpath = f'outputs/alpha_pose_{video_name}' if os.path.exists(args.outputpath): shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) else: os.mkdir(args.outputpath) # if not len(video_file): # raise IOError('Error: must contain --video') if len(img_path) and img_path != '/': for root, dirs, files in os.walk(img_path): im_names = sorted([f for f in files if 'png' in f or 'jpg' in f]) else: raise IOError('Error: must contain either --indir/--list') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() print(f'Totally {data_loader.datalen} images') # =========== end image =============== # =========== video =============== # args.outputpath = f'outputs/alpha_pose_{video_name}' # if os.path.exists(args.outputpath): # shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) # else: # os.mkdir(args.outputpath) # # videofile = args.video # mode = args.mode # # if not len(videofile): # raise IOError('Error: must contain --video') # # # Load input video # data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() # (fourcc, fps, frameSize) = data_loader.videoinfo() # # print('the video is {} f/s'.format(fps)) # =========== end video =============== # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi') # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() writer = DataWriter(args.save_video).start() print('Start pose estimation...') im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: print(f'{i}-th image read None: handle_video') break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) kpts = [] for i in range(len(final_result)): kpt = max(final_result[i]['result'], key=lambda x: x['proposal_score'].data[0] * calculate_area(x[ 'keypoints']))['keypoints'] kpts.append(kpt.data.numpy()) name = f'{args.outputpath}/{video_name}.npz' kpts = np.array(kpts).astype(np.float32) print('kpts npz save in ', name) np.savez_compressed(name, kpts=kpts) return kpts
def update(self): # keep looping infinitely while True: sys.stdout.flush() print("generator len : " + str(self.Q.qsize())) # if the thread indicator variable is set, stop the # thread # if self.stopped: # cv2.destroyAllWindows() # if self.save_video: # self.stream.release() # return # otherwise, ensure the queue is not empty if not self.det_processor.Q.empty(): with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = self.det_processor.read() if orig_img is None: sys.stdout.flush() print(f'{im_name} image read None: handle_video') break orig_img = np.array(orig_img, dtype=np.uint8) if boxes is None or boxes.nelement() == 0: (boxes, scores, hm_data, pt1, pt2, orig_img, im_name) = (None, None, None, None, None, orig_img, im_name.split('/')[-1]) res = {'keypoints': -1, 'image': orig_img} self.Q.put(res) #TODO # cv2.imwrite("/home/hrs/Desktop/dd/now.jpg", orig_img) # img = orig_img # cv2.imshow("AlphaPose Demo", img) # cv2.waitKey(30) ###################################################################################### # self.image = self.ax_in.imshow(orig_img, aspect='equal') # self.image.set_data(orig_img) # plt.draw() # plt.pause(0.000000000000000001) ###################################################################################### # if opt.save_img or opt.save_video or opt.vis: # img = orig_img # if opt.vis: # cv2.imshow("AlphaPose Demo", img) # cv2.waitKey(30) # if opt.save_img: # cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) # if opt.save_video: # self.stream.write(img) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) datalen = inps.size(0) batchSize = 20 #args.posebatch() leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] # sys.stdout.flush() # print("hhhh") for j in range(num_batches): inps_j = inps[j * batchSize:min( (j + 1) * batchSize, datalen)].cuda() hm_j = self.pose_model(inps_j) hm.append(hm_j) # time1 = time.time() hm = torch.cat(hm) hm = hm.cpu().data (boxes, scores, hm_data, pt1, pt2, orig_img, im_name) = (boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) if opt.matching: preds = getMultiPeakPrediction( hm_data, pt1.numpy(), pt2.numpy(), opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = matching(boxes, scores.numpy(), preds) else: preds_hm, preds_img, preds_scores = getPrediction( hm_data, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW) result = pose_nms(boxes, scores, preds_img, preds_scores) result = {'imgname': im_name, 'result': result} self.final_result.append(result) # time2 = time.time() # print(time2-time1) ###################################################################################### # img = vis_frame(orig_img, result) # cv2.imshow("AlphaPose Demo", img) # cv2.imwrite("/home/hrs/Desktop/dd/now.jpg", img) # cv2.waitKey(30) ######################################################################## # self.point.set_offsets(keypoints[self.i]) # self.image = self.ax_in.imshow(orig_img, aspect='equal') # self.image.set_data(orig_img) # plt.draw() # plt.pause(0.000000000000000001) ########################################################################## if not result['result']: # No people res = {'keypoints': -1, 'image': orig_img} self.Q.put(res) #TODO else: kpt = max( result['result'], key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints'] res = {'keypoints': kpt, 'image': orig_img} self.Q.put(res) # kpt_np = kpt.numpy() # n = kpt_np.shape[0] # print(kpt_np.shape) # point_list = [(kpt_np[m, 0], kpt_np[m, 1]) for m in range(17)] # for point in point_list: # cv2.circle(pose_img, point, 1, (0, 43, 32), 4) # cv2.imshow(self.window, pose_img) # cv2.waitKey() # if opt.save_img or opt.save_video or opt.vis: # img = vis_frame(orig_img, result) # if opt.vis: # cv2.imshow("AlphaPose Demo", img) # cv2.waitKey(30) # if opt.save_img: # cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img) # if opt.save_video: # self.stream.write(img) else: time.sleep(0.1)