def convert_tao(file, classes): tao = Tao(file) raw = mmcv.load(file) out = defaultdict(list) out['tracks'] = raw['tracks'].copy() out['info'] = raw['info'].copy() out['licenses'] = raw['licenses'].copy() out['categories'] = classes for video in tqdm(raw['videos']): img_infos = tao.vid_img_map[video['id']] img_infos = sorted(img_infos, key=lambda x: x['frame_index']) frame_range = img_infos[1]['frame_index'] - img_infos[0]['frame_index'] video['frame_range'] = frame_range out['videos'].append(video) for i, img_info in enumerate(img_infos): img_info['frame_id'] = i img_info['neg_category_ids'] = video['neg_category_ids'] img_info['not_exhaustive_category_ids'] = video[ 'not_exhaustive_category_ids'] out['images'].append(img_info) ann_infos = tao.img_ann_map[img_info['id']] for ann_info in ann_infos: ann_info['instance_id'] = ann_info['track_id'] out['annotations'].append(ann_info) assert len(out['videos']) == len(raw['videos']) assert len(out['images']) == len(raw['images']) assert len(out['annotations']) == len(raw['annotations']) return out
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--annotations', type=Path, required=True) # We need the frames dir because the pickles contain boxes into the ordered # list frames. parser.add_argument('--frames-dir', type=Path, required=True) parser.add_argument('--pickle-dir', type=Path, required=True) parser.add_argument('--oracle-category', action='store_true') parser.add_argument('--workers', type=int, default=8) parser.add_argument('--threshold', default=0.5, type=float) parser.add_argument('--vis-cats', nargs='*', type=str) parser.add_argument('--videos', nargs='*') parser.add_argument('--output-dir', type=Path, required=True) args = parser.parse_args() args.output_dir.mkdir(exist_ok=True, parents=True) common_setup(__file__, args.output_dir, args) paths = list(args.pickle_dir.rglob('*.pkl')) tao = Tao(args.annotations) cats = tao.cats.copy() for cat in cats.values(): if cat['name'] == 'baby': cat['name'] = 'person' tasks = [] for p in paths: video_name = str(p.relative_to(args.pickle_dir)).split('.pkl')[0] if args.videos is not None and video_name not in args.videos: continue output_video = args.output_dir / f'{video_name}.mp4' if output_video.exists(): continue tasks.append({ 'pickle_path': p, 'video_name': video_name, 'frames_root': args.frames_dir, 'cats': cats, 'vis_cats': args.vis_cats, 'annotations_json': args.annotations, 'threshold': args.threshold, 'output_video': output_video }) if args.workers == 0: for task in tqdm(tasks): visualize(**task) else: pool = Pool(args.workers) list(tqdm(pool.imap_unordered(visualize_star, tasks), total=len(tasks)))
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--annotations', type=Path, required=True) # We need the frames dir because the pickles contain boxes into the ordered # list frames. parser.add_argument('--frames-dir', type=Path, required=True) parser.add_argument('--pickle-dir', type=Path, nargs='+', required=True) parser.add_argument('--oracle-category', action='store_true') parser.add_argument('--output-dir', type=Path, required=True) args = parser.parse_args() args.output_dir.mkdir(exist_ok=True, parents=True) common_setup(__file__, args.output_dir, args) tao = Tao(args.annotations) create_json(args.pickle_dir, tao, args.frames_dir, args.output_dir, args.oracle_category)
def evaluate(annotations, predictions, cfg, logger=logging.root): """ Args: annotations (str, Path, or dict) predictions (str, Path or dict) cfg (ConfigNode) """ logger.info(f'Evaluating predictions at path: {predictions}') logger.info(f'Using annotations at path: {annotations}') verify_config_or_error(cfg) if cfg.SUPERCATEGORY_MAP: assert not cfg.CATEGORY_AGNOSTIC, ( '--category-agnostic is not valid if --supercategory-map is ' 'specified.') assert not cfg.CATEGORIES, ( '--categories cannot be specified if --supercategory-map is ' 'specified.') if isinstance(annotations, dict): tao = annotations else: with open(annotations, 'r') as f: tao = json.load(f) # name_to_id = {x['name']: x['id'] for x in tao['categories']} merge_categories = Tao._construct_merge_map(tao) assert merge_categories for ann in tao['annotations'] + tao['tracks']: ann['category_id'] = merge_categories.get(ann['category_id'], ann['category_id']) tao = Tao(tao) if cfg.PREDICTIONS_FORMAT == 'json': if isinstance(predictions, dict): results = predictions else: with open(predictions, 'r') as f: results = json.load(f) for x in results: x['score'] = float(x['score']) if cfg.THRESHOLD >= 0: results = [ x for x in results if x['score'] >= cfg.THRESHOLD ] elif cfg.PREDICTIONS_FORMAT in ('mat_dir', 'pickle_dir', 'pkl_dir'): detection_format = cfg.PREDICTIONS_FORMAT.split('_')[0] results = misc.load_detection_dir_as_results( predictions, tao.dataset, score_threshold=cfg.THRESHOLD, detections_format=detection_format, show_progress=True) invalid_images = { x['image_id'] for x in results if x['image_id'] not in tao.imgs } if invalid_images: logger.warning(f'Found invalid image ids: {invalid_images}') results = [x for x in results if x['image_id'] not in invalid_images] if cfg.CATEGORY_AGNOSTIC: for x in results: x['category_id'] = 1 if cfg.SPLIT_CLASS_TRACKS: track_id_gen = itertools.count(1) unique_track_ids = defaultdict(lambda: next(track_id_gen)) for x in results: x['track_id'] = unique_track_ids[(x['track_id'], x['category_id'])] if cfg.SPLIT_TRACKS: last_track_id = itertools.count( max([x['track_id'] for x in tao.anns.values()]) + 1) for x in results: x['track_id'] = next(last_track_id) for x in results: x['category_id'] = merge_categories.get(x['category_id'], x['category_id']) fill_video_ids_inplace(results, tao) if cfg.SINGLE_OBJECT.ENABLED: update_init_scores_inplace(results, cfg.SINGLE_OBJECT) num_updated_tracks = make_track_ids_unique(results) if num_updated_tracks: logger.info( f'Updating {num_updated_tracks} track ids to make them unique.') set_track_scores_inplace(results, cfg.TRACK_SCORE_TOP_PERC) results = TaoResults(tao, results) if cfg.ORACLE.TYPE != 'none': results = apply_oracle(tao, results, cfg.ORACLE, cfg.CATEGORY_AGNOSTIC, logger=logger) tao_eval = TaoEval(tao, results, iou_3d_type=cfg.IOU_3D_TYPE) if cfg.CATEGORY_AGNOSTIC: tao_eval.params.use_cats = 0 if cfg.CATEGORIES: if cfg.CATEGORY_AGNOSTIC: raise ValueError( '--categories and --category-agnostic are mutually exclusive') cat_synset_to_id = {x['synset']: x['id'] for x in tao.cats.values()} cat_ids = [] for x in cfg.CATEGORIES: if x not in cat_synset_to_id: raise ValueError( f'Could not find category synset {x} (specified from ' f'--categories)') cat_ids.append(cat_synset_to_id[x]) tao_eval.params.cat_ids = cat_ids tao_eval.params.area_rng = [ x for x, l in zip(tao_eval.params.area_rng, tao_eval.params.area_rng_lbl) if l in cfg.AREA_RNG ] tao_eval.params.area_rng_lbl = cfg.AREA_RNG tao_eval.params.iou_thrs = cfg.EVAL_IOUS tao_eval.run() eval_info = {'tao_eval': tao_eval} if cfg.MOTA.ENABLED: from .evaluation_mota import evaluate_mota mota_info = evaluate_mota(tao_eval, cfg, logger) eval_info['mota_eval'] = mota_info return eval_info
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--annotations', type=Path, required=True) parser.add_argument('--frames-dir', type=Path, required=True) parser.add_argument('--init', choices=['first', 'biggest'], default='first') parser.add_argument('--output-dir', type=Path, required=True) parser.add_argument('--tracker', choices=['pysot', 'pytrack', 'staple', 'srdcf'], default='pysot') pysot_args = parser.add_argument_group('pysot_params') pysot_args.add_argument('--pysot-config-file', '--config-file', type=Path) pysot_args.add_argument('--pysot-model-path', '--model-path', type=Path) pytracking_args = parser.add_argument_group('pytracking_params') pytracking_args.add_argument('--pytrack-name') pytracking_args.add_argument('--pytrack-param') pytracking_args.add_argument( '--pytrack-model-path', help=('Specify path to model, if different from the one implied by ' '--pytrack-param.')) parser.add_argument('--gpus', default=[0, 1, 2, 3], nargs='*', type=int) parser.add_argument('--tasks-per-gpu', default=1, type=int) parser.add_argument('--visualize', default=False, type=misc.parse_bool) args = parser.parse_args() args.output_dir.mkdir(exist_ok=True, parents=True) if args.init == 'first': common_setup(__file__, args.output_dir, args) else: common_setup(f'{Path(__file__).stem}_{args.init}', args.output_dir, args) _num_threads = 4 torch.set_num_threads(_num_threads) os.environ['OMP_NUM_THREADS'] = str(_num_threads) if args.tracker == 'pysot': assert args.pysot_config_file is not None assert args.pysot_model_path is not None elif args.tracker == 'pytrack': assert args.pytrack_name is not None assert args.pytrack_param is not None elif args.tracker in ('staple', 'srdcf'): pass tao = Tao(args.annotations) video_tracks = defaultdict(list) for track_id, track in tao.tracks.items(): video_tracks[track['video_id']].append(track) # List of kwargs passed to track_video(). track_video_tasks = [] pickle_output_dir = args.output_dir / "pickles" pickle_output_dir.mkdir(exist_ok=True, parents=True) for video_id, tracks in tqdm(video_tracks.items(), desc='Collecting tasks'): video_name = tao.vids[video_id]['name'] frames_dir = args.frames_dir / video_name output = (pickle_output_dir / video_name).with_suffix('.pkl') if output.exists(): logging.info(f'{output} already exists, skipping.') continue # Map track id to # {'frame': name, 'init': [x0, y0, w, h]} frames = natsorted(fs.glob_ext(frames_dir, fs.IMG_EXTENSIONS)) if not frames[0].exists(): # Just check the first frame for efficiency; usually, either all # frames will be missing, or all will be available. logging.info(f'Frame link {frames[0]} broken for {video_name} in ' f'{frames_dir}, skipping.') continue objects = {} for track in tracks: annotation = tao.get_single_object_init(track['id'], args.init) frame_name = tao.imgs[annotation['image_id']]['file_name'] frame_indices = { str(x.relative_to(args.frames_dir)): i for i, x in enumerate(frames) } init_frame_index = frame_indices[frame_name] if args.init == 'first': first_frame_index = init_frame_index else: first_ann = tao.get_kth_annotation(track['id'], 0) first_frame_name = tao.imgs[first_ann['image_id']]['file_name'] first_frame_index = frame_indices[first_frame_name] objects[track['id']] = { 'first_annotated_frame': first_frame_index, 'sot_init_frame': init_frame_index, 'init': annotation['bbox'], } task = { 'objects': objects, 'output_pickle': output, 'frames_dir': frames_dir, 'visualize': args.visualize } track_video_tasks.append(task) gpus = args.gpus * args.tasks_per_gpu if args.tracker == 'pysot': tracker_init = { 'config_file': args.pysot_config_file, 'model_path': args.pysot_model_path, } elif args.tracker == 'pytrack': tracker_init = { 'tracker_name': args.pytrack_name, 'tracker_param': args.pytrack_param, 'model_path': args.pytrack_model_path } elif args.tracker in ('staple', 'srdcf'): tracker_init = {} if track_video_tasks: gpus = gpus[:len(track_video_tasks)] print(gpus) if len(gpus) == 1: context = {'gpu': gpus[0]} init_tracker( { 'tracker_init': tracker_init, 'tracker_type': args.tracker }, context) for task in tqdm(track_video_tasks): task['show_progress'] = True track_video_helper(task, context) else: pool = FixedGpuPool(gpus, initializer=init_tracker, initargs={ 'tracker_init': tracker_init, 'tracker_type': args.tracker }) list( tqdm(pool.imap_unordered(track_video_helper, track_video_tasks), total=len(track_video_tasks))) else: logging.warning('No tasks found!') create_json(pickle_output_dir, tao, args.frames_dir, args.output_dir, oracle_category=True)
def visualize(pickle_path, video_name, frames_root, cats, vis_cats, annotations_json, threshold, output_video): logging.getLogger('tao.toolkit.tao.tao').setLevel(logging.WARN) tao = Tao(annotations_json) frames_dir = frames_root / video_name frame_paths = natsorted(fs.glob_ext(frames_dir, fs.IMG_EXTENSIONS)) frames = [str(x.relative_to(frames_root)) for x in frame_paths] frame_indices = {x: i for i, x in enumerate(frames)} with open(pickle_path, 'rb') as f: # Map object_id to {'boxes': np.array} tracks = pickle.load(f) init_type = tracks.pop('_init_type', 'first') if init_type != 'first': raise NotImplementedError( 'init type "{init_type}" not yet implemented.') frame_annotations = defaultdict(list) init_frames = {} annotation_id_generator = itertools.count() for object_id, outputs in tracks.items(): init = tao.get_kth_annotation(object_id, k=0) init_frame = frame_indices[tao.imgs[init['image_id']]['file_name']] init_frames[object_id] = init_frame boxes = outputs['boxes'] for i, frame in enumerate(frames[init_frame:]): if len(boxes) <= i: logging.warn(f'Could not find box for object {object_id} for ' f'frame (index: {i}, {frame})') continue box = boxes[i].tolist() if len(box) == 4: box.append(1) x0, y0, x1, y1, score = box if score < threshold: continue w, h = x1 - x0 + 1, y1 - y0 + 1 category = tao.tracks[object_id]['category_id'] if (vis_cats is not None and tao.cats[category]['name'] not in vis_cats): continue frame_annotations[frame].append({ 'id': next(annotation_id_generator), 'track_id': object_id, 'bbox': [x0, y0, w, h], 'category_id': category, 'score': score }) size = Image.open(frame_paths[0]).size output_video.parent.mkdir(exist_ok=True, parents=True) with video_utils.video_writer(output_video, size=size) as writer: color_generator = itertools.cycle(colormap(as_int=True).tolist()) colors = defaultdict(lambda: next(color_generator)) for frame in frame_paths: image = np.array(Image.open(frame)) frame_key = str(frame.relative_to(frames_root)) tracks = frame_annotations[frame_key] image = vis.overlay_boxes_coco( image, tracks, colors=[colors[x['track_id']] for x in tracks]) image = vis.overlay_class_coco(image, tracks, categories=cats, font_scale=1, font_thickness=2) writer.write_frame(image)