def main(): """Visualize the demo images. Using mmdet to detect the human. """ parser = ArgumentParser() parser.add_argument('tracking_config', help='Config file for tracking') parser.add_argument('pose_config', help='Config file for pose') parser.add_argument('pose_checkpoint', help='Checkpoint file for pose') parser.add_argument('--video-path', type=str, help='Video path') parser.add_argument('--show', action='store_true', default=False, help='whether to show visualizations.') parser.add_argument('--out-video-root', default='', help='Root of the output video file. ' 'Default not saving the visualization video.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--bbox-thr', type=float, default=0.3, help='Bounding box score threshold') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') parser.add_argument('--radius', type=int, default=4, help='Keypoint radius for visualization') parser.add_argument('--thickness', type=int, default=1, help='Link thickness for visualization') assert has_mmtrack, 'Please install mmtrack to run the demo.' args = parser.parse_args() assert args.show or (args.out_video_root != '') assert args.tracking_config is not None tracking_model = init_tracking_model(args.tracking_config, None, device=args.device.lower()) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] dataset_info = pose_model.cfg.data['test'].get('dataset_info', None) if dataset_info is None: warnings.warn( 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', DeprecationWarning) else: dataset_info = DatasetInfo(dataset_info) cap = cv2.VideoCapture(args.video_path) assert cap.isOpened(), f'Faild to load video file {args.video_path}' if args.out_video_root == '': save_out_video = False else: os.makedirs(args.out_video_root, exist_ok=True) save_out_video = True if save_out_video: fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fourcc = cv2.VideoWriter_fourcc(*'mp4v') videoWriter = cv2.VideoWriter( os.path.join(args.out_video_root, f'vis_{os.path.basename(args.video_path)}'), fourcc, fps, size) # optional return_heatmap = False # e.g. use ('backbone', ) to return backbone feature output_layer_names = None frame_id = 0 while (cap.isOpened()): flag, img = cap.read() if not flag: break mmtracking_results = inference_mot(tracking_model, img, frame_id=frame_id) # keep the person class bounding boxes. person_results = process_mmtracking_results(mmtracking_results) # test a single image, with a list of bboxes. pose_results, returned_outputs = inference_top_down_pose_model( pose_model, img, person_results, bbox_thr=args.bbox_thr, format='xyxy', dataset=dataset, dataset_info=dataset_info, return_heatmap=return_heatmap, outputs=output_layer_names) # show the results vis_img = vis_pose_tracking_result(pose_model, img, pose_results, radius=args.radius, thickness=args.thickness, dataset=dataset, dataset_info=dataset_info, kpt_score_thr=args.kpt_thr, show=False) if args.show: cv2.imshow('Image', vis_img) if save_out_video: videoWriter.write(vis_img) if args.show and cv2.waitKey(1) & 0xFF == ord('q'): break frame_id += 1 cap.release() if save_out_video: videoWriter.release() if args.show: cv2.destroyAllWindows()
def main(): parser = ArgumentParser() parser.add_argument('config', help='config file') parser.add_argument('--input', help='input video file or folder') parser.add_argument('--output', help='output video file (mp4 format) or folder') parser.add_argument('--checkpoint', help='checkpoint file') parser.add_argument('--device', default='cuda:0', help='device used for inference') parser.add_argument('--show', action='store_true', help='whether show the results on the fly') parser.add_argument('--backend', choices=['cv2', 'plt'], default='cv2', help='the backend to visualize the results') parser.add_argument('--fps', help='FPS of the output video') args = parser.parse_args() assert args.output or args.show # load images if osp.isdir(args.input): imgs = sorted(os.listdir(args.input)) IN_VIDEO = False else: imgs = mmcv.VideoReader(args.input) IN_VIDEO = True # define output if args.output is not None: if args.output.endswith('.mp4'): OUT_VIDEO = True out_dir = tempfile.TemporaryDirectory() out_path = out_dir.name _out = args.output.rsplit('/', 1) if len(_out) > 1: os.makedirs(_out[0], exist_ok=True) else: OUT_VIDEO = False out_path = args.output os.makedirs(out_path, exist_ok=True) fps = args.fps if args.show or OUT_VIDEO: if fps is None and IN_VIDEO: fps = imgs.fps if not fps: raise ValueError('Please set the FPS for the output video.') fps = int(fps) # build the model from a config file and a checkpoint file model = init_model(args.config, args.checkpoint, device=args.device) prog_bar = mmcv.ProgressBar(len(imgs)) # test and show/save the images for i, img in enumerate(imgs): if isinstance(img, str): img = osp.join(args.input, img) result = inference_mot(model, img, frame_id=i) if args.output is not None: if IN_VIDEO or OUT_VIDEO: out_file = osp.join(out_path, f'{i:06d}.jpg') else: out_file = osp.join(out_path, img.rsplit('/', 1)[-1]) else: out_file = None model.show_result(img, result, show=args.show, wait_time=int(1000. / fps) if fps else 0, out_file=out_file, backend=args.backend) prog_bar.update() if OUT_VIDEO: print(f'making the output video at {args.output} with a FPS of {fps}') mmcv.frames2video(out_path, args.output, fps=fps) out_dir.cleanup()
def main(): parser = ArgumentParser() parser.add_argument('config', help='config file') parser.add_argument('--input', help='input video file or folder') parser.add_argument('--output', help='output video file (mp4 format) or folder') parser.add_argument('--checkpoint', help='checkpoint file') parser.add_argument('--device', default='cuda:0', help='device used for inference') parser.add_argument('--show', action='store_true', help='whether show the results on the fly') parser.add_argument('--backend', choices=['cv2', 'plt'], default='cv2', help='the backend to visualize the results') parser.add_argument('--fps', help='FPS of the output video') args = parser.parse_args() assert args.output or args.show # load images if osp.isdir(args.input): imgs = sorted(os.listdir(args.input)) IN_VIDEO = False else: imgs = mmcv.VideoReader(args.input) IN_VIDEO = True # define output if args.output is not None: if args.output.endswith('.mp4'): OUT_VIDEO = True out_dir = tempfile.TemporaryDirectory() out_path = out_dir.name _out = args.output.rsplit('/', 1) if len(_out) > 1: os.makedirs(_out[0], exist_ok=True) else: OUT_VIDEO = False out_path = args.output os.makedirs(out_path, exist_ok=True) fps = args.fps if args.show or OUT_VIDEO: if fps is None and IN_VIDEO: fps = imgs.fps if not fps: raise ValueError('Please set the FPS for the output video.') fps = int(fps) # build the model from a config file and a checkpoint file model = init_model(args.config, args.checkpoint, device=args.device) # test and show/save the images raw_video_path = "/data/taofuyu/tao_dataset/car_reid/high_video/f73dc1dc-a5b3040f/" all_video = [] for dir, _, files in os.walk(raw_video_path): for ff in files: if ff.split(".")[-1] == "mp4": all_video.append(os.path.join(dir, ff)) for video in all_video: appeared_id = [] imgs = mmcv.VideoReader(video) print(video) print( os.path.join("/data/taofuyu/tao_dataset/car_reid/high/", video.split("/")[-1].split(".")[0])) prog_bar = mmcv.ProgressBar(len(imgs)) for i, img in enumerate(imgs): if isinstance(img, str): img = osp.join(video, img) result = inference_mot(model, img, frame_id=i) result = result['track_results'] car_result = result[6] #6: roof for car in car_result: video_path = video sample_save_path = os.path.join( "/data/taofuyu/tao_dataset/car_reid/roof/", video_path.split("/")[-1].split(".")[0]) if not os.path.exists(sample_save_path): os.makedirs(sample_save_path) car_id = str(int(car[0])) car_box = car[1:-1] x_min, y_min, x_max, y_max = map(int, list(car_box)) car_patch = img[y_min:y_max, x_min:x_max] h, w, _ = car_patch.shape if h <= 0 or w <= 0: continue if not car_id in appeared_id: cv2.imwrite( os.path.join(sample_save_path, "id_" + car_id + ".jpg"), car_patch) appeared_id.append(car_id) track_save_path = os.path.join(sample_save_path, "id_" + car_id) if not os.path.exists(track_save_path): os.makedirs(track_save_path) cv2.imwrite( track_save_path + "/" + str(i) + "_" + car_id + ".jpg", car_patch) # if args.output is not None: # if IN_VIDEO or OUT_VIDEO: # out_file = osp.join(out_path, f'{i:06d}.jpg') # else: # out_file = osp.join(out_path, img.rsplit('/', 1)[-1]) # else: # out_file = None # model.show_result( # img, # result, # show=args.show, # wait_time=int(1000. / fps) if fps else 0, # out_file=out_file, # backend=args.backend) prog_bar.update()
def main(): """Visualize the demo images. Using mmdet to detect the human. """ parser = ArgumentParser() parser.add_argument('tracking_config', help='Config file for tracking') parser.add_argument('pose_config', help='Config file for pose') parser.add_argument('pose_checkpoint', help='Checkpoint file for pose') parser.add_argument('--video-path', type=str, help='Video path') parser.add_argument('--show', action='store_true', default=False, help='whether to show visualizations.') parser.add_argument('--out-video-root', default='', help='Root of the output video file. ' 'Default not saving the visualization video.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--bbox-thr', type=float, default=0.3, help='Bounding box score threshold') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') parser.add_argument('--radius', type=int, default=4, help='Keypoint radius for visualization') parser.add_argument('--thickness', type=int, default=1, help='Link thickness for visualization') parser.add_argument( '--smooth', action='store_true', help='Apply a temporal filter to smooth the pose estimation results. ' 'See also --smooth-filter-cfg.') parser.add_argument( '--smooth-filter-cfg', type=str, default='configs/_base_/filters/one_euro.py', help='Config file of the filter to smooth the pose estimation ' 'results. See also --smooth.') parser.add_argument( '--use-multi-frames', action='store_true', default=False, help='whether to use multi frames for inference in the pose' 'estimation stage. Default: False.') parser.add_argument( '--online', action='store_true', default=False, help='inference mode. If set to True, can not use future frame' 'information when using multi frames for inference in the pose' 'estimation stage. Default: False.') assert has_mmtrack, 'Please install mmtrack to run the demo.' args = parser.parse_args() assert args.show or (args.out_video_root != '') assert args.tracking_config is not None print('Initializing model...') tracking_model = init_tracking_model(args.tracking_config, None, device=args.device.lower()) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] dataset_info = pose_model.cfg.data['test'].get('dataset_info', None) if dataset_info is None: warnings.warn( 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', DeprecationWarning) else: dataset_info = DatasetInfo(dataset_info) # read video video = mmcv.VideoReader(args.video_path) assert video.opened, f'Faild to load video file {args.video_path}' if args.out_video_root == '': save_out_video = False else: os.makedirs(args.out_video_root, exist_ok=True) save_out_video = True if save_out_video: fps = video.fps size = (video.width, video.height) fourcc = cv2.VideoWriter_fourcc(*'mp4v') videoWriter = cv2.VideoWriter( os.path.join(args.out_video_root, f'vis_{os.path.basename(args.video_path)}'), fourcc, fps, size) # frame index offsets for inference, used in multi-frame inference setting if args.use_multi_frames: assert 'frame_indices_test' in pose_model.cfg.data.test.data_cfg indices = pose_model.cfg.data.test.data_cfg['frame_indices_test'] # build pose smoother for temporal refinement if args.smooth: smoother = Smoother(filter_cfg=args.smooth_filter_cfg, keypoint_dim=2) else: smoother = None # whether to return heatmap, optional return_heatmap = False # return the output of some desired layers, # e.g. use ('backbone', ) to return backbone feature output_layer_names = None print('Running inference...') for frame_id, cur_frame in enumerate(mmcv.track_iter_progress(video)): if args.use_multi_frames: frames = collect_multi_frames(video, frame_id, indices, args.online) mmtracking_results = inference_mot(tracking_model, cur_frame, frame_id=frame_id) # keep the person class bounding boxes. person_results = process_mmtracking_results(mmtracking_results) # test a single image, with a list of bboxes. pose_results, returned_outputs = inference_top_down_pose_model( pose_model, frames if args.use_multi_frames else cur_frame, person_results, bbox_thr=args.bbox_thr, format='xyxy', dataset=dataset, dataset_info=dataset_info, return_heatmap=return_heatmap, outputs=output_layer_names) if smoother: pose_results = smoother.smooth(pose_results) # show the results vis_frame = vis_pose_tracking_result(pose_model, cur_frame, pose_results, radius=args.radius, thickness=args.thickness, dataset=dataset, dataset_info=dataset_info, kpt_score_thr=args.kpt_thr, show=False) if args.show: cv2.imshow('Frame', vis_frame) if save_out_video: videoWriter.write(vis_frame) if args.show and cv2.waitKey(1) & 0xFF == ord('q'): break if save_out_video: videoWriter.release() if args.show: cv2.destroyAllWindows()