def test_tpn(): config = get_recognizer_cfg('tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 8, 3, 224, 224) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) # Test forward dummy with torch.no_grad(): _recognizer = build_recognizer(config.model) img_list = [img[None, :] for img in imgs] if hasattr(_recognizer, 'forward_dummy'): _recognizer.forward = _recognizer.forward_dummy for one_img in img_list: _recognizer(one_img)
def test_slowfast(): model, train_cfg, test_cfg = get_recognizer_cfg( 'slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py') recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) input_shape = (1, 3, 3, 16, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape, '3D') imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] # parrots 3dconv is only implemented on gpu if torch.__version__ == 'parrots': if torch.cuda.is_available(): recognizer = recognizer.cuda() imgs = imgs.cuda() gt_labels = gt_labels.cuda() losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) else: losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) # Test the feature max_testing_views test_cfg['max_testing_views'] = 1 recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False)
def test_tpn(): model, train_cfg, test_cfg = _get_recognizer_cfg( 'tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py') model['backbone']['pretrained'] = None recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) input_shape = (1, 8, 3, 224, 224) demo_inputs = generate_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) model, train_cfg, test_cfg = _get_recognizer_cfg( 'tpn/tpn_slowonly_r50_8x8x1_150e_kinetics_rgb.py') model['backbone']['pretrained'] = None recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) input_shape = (1, 8, 3, 1, 224, 224) demo_inputs = generate_demo_inputs(input_shape, '3D') imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True)
def test_register_module_hooks(): _module_hooks = [ dict(type='GPUNormalize', hooked_module='backbone', hook_pos='forward_pre', input_format='NCHW', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]) ] repo_dpath = osp.dirname(osp.dirname(osp.dirname(__file__))) config_fpath = osp.join(repo_dpath, 'configs/_base_/models/tsm_r50.py') config = mmcv.Config.fromfile(config_fpath) config.model['backbone']['pretrained'] = None # case 1 module_hooks = copy.deepcopy(_module_hooks) module_hooks[0]['hook_pos'] = 'forward_pre' recognizer = build_recognizer(config.model) handles = register_module_hooks(recognizer, module_hooks) assert recognizer.backbone._forward_pre_hooks[ handles[0].id].__name__ == 'normalize_hook' # case 2 module_hooks = copy.deepcopy(_module_hooks) module_hooks[0]['hook_pos'] = 'forward' recognizer = build_recognizer(config.model) handles = register_module_hooks(recognizer, module_hooks) assert recognizer.backbone._forward_hooks[ handles[0].id].__name__ == 'normalize_hook' # case 3 module_hooks = copy.deepcopy(_module_hooks) module_hooks[0]['hooked_module'] = 'cls_head' module_hooks[0]['hook_pos'] = 'backward' recognizer = build_recognizer(config.model) handles = register_module_hooks(recognizer, module_hooks) assert recognizer.cls_head._backward_hooks[ handles[0].id].__name__ == 'normalize_hook' # case 4 module_hooks = copy.deepcopy(_module_hooks) module_hooks[0]['hook_pos'] = '_other_pos' recognizer = build_recognizer(config.model) with pytest.raises(ValueError): handles = register_module_hooks(recognizer, module_hooks) # case 5 module_hooks = copy.deepcopy(_module_hooks) module_hooks[0]['hooked_module'] = '_other_module' recognizer = build_recognizer(config.model) with pytest.raises(ValueError): handles = register_module_hooks(recognizer, module_hooks)
def test_tsn(): config = get_recognizer_cfg('tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) mmcls_backbone = dict(type='mmcls.ResNeXt', depth=101, num_stages=4, out_indices=(3, ), groups=32, width_per_group=4, style='pytorch') config.model['backbone'] = mmcls_backbone recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False)
def do_test(cfg, checkpoint, gpus=1, proc_per_gpu=1, task='verb', logger=None): # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.data.test.test_mode = True cfg.data.workers_per_gpu = 16 # if 'input_size' in cfg.data.test and cfg.data.test.input_size == 256: # cfg.model.spatial_temporal_module.spatial_size = 8 dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) assert gpus == 1, "1 gpu is faster now" model = build_recognizer( cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, checkpoint, strict=True) model = MMDataParallel(model, device_ids=[0]) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, num_gpus=1, dist=False, shuffle=False) outputs = single_test(model, data_loader) print("\n---------------") print(outputs)
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) cfg.data.videos_per_gpu = 1 net = build_recognizer(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) net.eval() load_checkpoint(net, args.checkpoint, force_matching=True) input_time_size = cfg.input_clip_length input_image_size = (tuple(cfg.input_img_size) if isinstance( cfg.input_img_size, (list, tuple)) else (cfg.input_img_size, cfg.input_img_size)) input_size = (3, input_time_size) + input_image_size output_path = args.output_name if not output_path.endswith('.onnx'): output_path = '{}.onnx'.format(output_path) base_output_dir = dirname(output_path) if not exists(base_output_dir): makedirs(base_output_dir) if hasattr(net, 'forward_inference'): net.forward = net.forward_inference convert_to_onnx(net, input_size, args.output_name, check=args.check)
def main(): parser = ArgumentParser() parser.add_argument('config', help='Config file path') parser.add_argument('--load_from', help='the checkpoint file to init weights from') parser.add_argument('--load2d_from', help='the checkpoint file to init 2D weights from') parser.add_argument('--update_config', nargs='+', action=ExtendedDictAction, help='arguments in dict') args = parser.parse_args() cfg = mmcv.Config.fromfile(args.config) if args.update_config is not None: cfg.merge_from_dict(args.update_config) cfg = update_config(cfg, args) net = build_recognizer(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) net.eval() if cfg.load_from: logger = get_root_logger(log_level=cfg.log_level) load_checkpoint(net, cfg.load_from, strict=False, logger=logger, show_converted=True, force_matching=True) conv_layers = collect_conv_layers(net) show_stat(conv_layers)
def main(): parser = ArgumentParser(description='Measure number of FLOPs') parser.add_argument('config', help='test config file path') parser.add_argument('--layer_stat', '-ls', action='store_true', help='Whether to print per layer stat') args = parser.parse_args() cfg = mmcv.Config.fromfile(args.config) cfg.data.videos_per_gpu = 1 cfg.model.type += '_Inference' cfg.model.backbone.type += '_Inference' cfg.model.backbone.inference = True cfg.model.cls_head.type += '_Inference' time_length = cfg.data.test.out_length if hasattr( cfg.data.test, 'out_length') else cfg.data.test.new_length input_size = (cfg.model.backbone.num_input_layers, time_length) + cfg.data.test.input_size with torch.no_grad(): net = build_recognizer(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg).cuda() flops, params = get_model_complexity_info( net, input_size, as_strings=True, print_per_layer_stat=args.layer_stat) print('Flops: ' + flops) print('Params: ' + params)
def test_c3d(): model, train_cfg, test_cfg = _get_recognizer_cfg( 'c3d/c3d_sports1m_16x1x1_45e_ucf101_rgb.py') model['backbone']['pretrained'] = None recognizer = build_recognizer( model, train_cfg=train_cfg, test_cfg=test_cfg) input_shape = (1, 3, 3, 16, 112, 112) demo_inputs = generate_demo_inputs(input_shape, '3D') imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True)
def test_timesformer(): config = get_recognizer_cfg( 'timesformer/timesformer_divST_8x32x1_15e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None config.model['backbone']['img_size'] = 32 recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 8, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape, '3D') imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True)
def __init__( self, rgb_model_path, flow_model_path=None, rgb_config_path=os.path.join(py_file_root, 'configs/TSN/my_data/tsn_rgb_sknet.py'), flow_config_path=os.path.join(py_file_root, 'configs/TSN/my_data/tsn_flow_sknet.py'), classind_path=os.path.join(py_file_root, 'data/my_data/annotations/classInd.txt')): self.rgb_cfg = mmcv.Config.fromfile(rgb_config_path) self.flow_cfg = mmcv.Config.fromfile(flow_config_path) self.rgb_model = build_recognizer(self.rgb_cfg.model, train_cfg=None, test_cfg=self.rgb_cfg.test_cfg) load_checkpoint(self.rgb_model, rgb_model_path, map_location='cpu') '''TODO''' # self.flow_model = build_recognizer(flow_model_path, train_cfg=None, test_cfg=self.flow_cfg.test_cfg) # self.classind = None with open(classind_path, 'r') as f: tmp = [x.strip().split(' ') for x in f.readlines()] self.classind = {int(i): x for i, x in tmp} if self.classind == None: raise ModuleNotFoundError("No classind file")
def init_recognizer(config, checkpoint=None, device='cuda:0', **kwargs): """Initialize a recognizer from config file. Args: config (str | :obj:`mmcv.Config`): Config file path or the config object. checkpoint (str | None, optional): Checkpoint path/url. If set to None, the model will not load any weights. Default: None. device (str | :obj:`torch.device`): The desired device of returned tensor. Default: 'cuda:0'. Returns: nn.Module: The constructed recognizer. """ if 'use_frames' in kwargs: warnings.warn('The argument `use_frames` is deprecated PR #1191. ' 'Now you can use models trained with frames or videos ' 'arbitrarily. ') if isinstance(config, str): config = mmcv.Config.fromfile(config) elif not isinstance(config, mmcv.Config): raise TypeError('config must be a filename or Config object, ' f'but got {type(config)}') # pretrained model is unnecessary since we directly load checkpoint later config.model.backbone.pretrained = None model = build_recognizer(config.model, test_cfg=config.get('test_cfg')) if checkpoint is not None: load_checkpoint(model, checkpoint, map_location='cpu') model.cfg = config model.to(device) model.eval() return model
def init_recognizer(config, checkpoint=None, label_file=None, device='cuda:0'): if isinstance(config, str): config = mmcv.Config.fromfile(config) elif not isinstance(config, mmcv.Config): raise TypeError('config must be a filename or Config object, ' 'but got {}'.format(type(config))) config.model.backbone.pretrained = None config.model.spatial_temporal_module.spatial_size = 8 model = build_recognizer(config.model, train_cfg=None, test_cfg=config.test_cfg) if checkpoint is not None: checkpoint = load_checkpoint(model, checkpoint) if label_file is not None: classes = [ line.rstrip() for line in open(label_file, 'r').readlines() ] model.CLASSES = classes else: if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: warnings.warn( 'Class names are not saved in the checkpoint\'s ' 'meta data, use something-something-v2 classes by default.' ) model.CLASSES = get_classes('something=something-v2') model.cfg = config # save the config in the model for convenience model.to(device) model.eval() return model
def test_tsn(): model, train_cfg, test_cfg = get_recognizer_cfg( 'tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py') model['backbone']['pretrained'] = None recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True)
def test_i3d(): config = get_recognizer_cfg('i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py') config.model['backbone']['pretrained2d'] = False config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 8, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape, '3D') imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] # parrots 3dconv is only implemented on gpu if torch.__version__ == 'parrots': if torch.cuda.is_available(): recognizer = recognizer.cuda() imgs = imgs.cuda() gt_labels = gt_labels.cuda() losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) # Test forward dummy recognizer.forward_dummy(imgs, softmax=False) res = recognizer.forward_dummy(imgs, softmax=True)[0] assert torch.min(res) >= 0 assert torch.max(res) <= 1 else: losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) # Test forward dummy recognizer.forward_dummy(imgs, softmax=False) res = recognizer.forward_dummy(imgs, softmax=True)[0] assert torch.min(res) >= 0 assert torch.max(res) <= 1
def test_x3d(): config = get_recognizer_cfg('x3d/x3d_s_13x6x1_facebook_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) recognizer.cfg = config input_shape = (1, 1, 3, 13, 32, 32) target_layer_name = 'backbone/layer4/1/relu' _do_test_3D_models(recognizer, target_layer_name, input_shape)
def test_c3d(): config = get_recognizer_cfg('c3d/c3d_sports1m_16x1x1_45e_ucf101_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) recognizer.cfg = config input_shape = (1, 1, 3, 16, 112, 112) target_layer_name = 'backbone/conv5a/activate' _do_test_3D_models(recognizer, target_layer_name, input_shape, 101)
def test_tsm(): config = get_cfg('recognition', 'tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None target_layer_name = 'backbone/layer4/1/relu' # base config recognizer = build_recognizer(config.model, test_cfg=config.test_cfg) recognizer.cfg = config input_shape = (1, 8, 3, 32, 32) _do_test_2D_models(recognizer, target_layer_name, input_shape) # test twice sample + 3 crops, 2*3*8=48 test_cfg = dict(average_clips='prob') recognizer = build_recognizer(config.model, test_cfg=test_cfg) recognizer.cfg = config input_shape = (1, 48, 3, 32, 32) _do_test_2D_models(recognizer, target_layer_name, input_shape)
def test_tpn(): target_layer_name = 'backbone/layer4/1/relu' config = _get_cfg('tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model, test_cfg=config.test_cfg) recognizer.cfg = config input_shape = (1, 8, 3, 32, 32) _do_test_2D_models(recognizer, target_layer_name, input_shape, 174) config = _get_cfg('tpn/tpn_slowonly_r50_8x8x1_150e_kinetics_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model, test_cfg=config.test_cfg) recognizer.cfg = config input_shape = (1, 3, 3, 8, 32, 32) _do_test_3D_models(recognizer, target_layer_name, input_shape)
def test_tsm(): model, train_cfg, test_cfg = _get_recognizer_cfg( 'tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py') model['backbone']['pretrained'] = None recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) input_shape = (1, 8, 3, 32, 32) demo_inputs = generate_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # test twice sample + 3 crops input_shape = (2, 48, 3, 32, 32) demo_inputs = generate_demo_inputs(input_shape) imgs = demo_inputs['imgs'] test_cfg = dict(average_clips='prob') recognizer = build_recognizer(model, train_cfg=train_cfg, test_cfg=test_cfg) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True)
def test_skeletongcn(): config = get_skeletongcn_cfg('stgcn/stgcn_80e_ntu60_xsub_keypoint.py') with pytest.raises(TypeError): # "pretrained" must be a str or None config.model['backbone']['pretrained'] = ['None'] recognizer = build_recognizer(config.model) config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 3, 300, 17, 2) demo_inputs = generate_recognizer_demo_inputs(input_shape, 'skeleton') skeletons = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(skeletons, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): skeleton_list = [skeleton[None, :] for skeleton in skeletons] for one_skeleton in skeleton_list: recognizer(one_skeleton, None, return_loss=False) # test stgcn without edge importance weighting config.model['backbone']['edge_importance_weighting'] = False recognizer = build_recognizer(config.model) input_shape = (1, 3, 300, 17, 2) demo_inputs = generate_recognizer_demo_inputs(input_shape, 'skeleton') skeletons = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(skeletons, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): skeleton_list = [skeleton[None, :] for skeleton in skeletons] for one_skeleton in skeleton_list: recognizer(one_skeleton, None, return_loss=False)
def test_tsn(): config = get_recognizer_cfg('tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) recognizer.cfg = config input_shape = (1, 25, 3, 32, 32) target_layer_name = 'backbone/layer4/1/relu' _do_test_2D_models(recognizer, target_layer_name, input_shape)
def test_slowfast(): config = _get_cfg('slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py') recognizer = build_recognizer(config.model, test_cfg=config.test_cfg) recognizer.cfg = config input_shape = (1, 1, 3, 32, 32, 32) target_layer_name = 'backbone/slow_path/layer4/1/relu' _do_test_3D_models(recognizer, target_layer_name, input_shape)
def test_tin(): config = get_recognizer_cfg( 'tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None target_layer_name = 'backbone/layer4/1/relu' recognizer = build_recognizer(config.model) recognizer.cfg = config input_shape = (1, 8, 3, 64, 64) _do_test_2D_models( recognizer, target_layer_name, input_shape, device='cuda:0')
def tsm(**kwargs): """Constructs a tsm model. """ tsmconfig = 'config_files/sthv2/tsm_baseline.py' cfg = Config1.fromfile(tsmconfig) tsm = build_recognizer(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) model = tsm print('model', model) return model
def test_csn(): config = get_recognizer_cfg( 'csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py') config.model['backbone']['pretrained2d'] = False config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) recognizer.cfg = config input_shape = (1, 1, 3, 32, 32, 32) target_layer_name = 'backbone/layer4/1/relu' _do_test_3D_models(recognizer, target_layer_name, input_shape)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if cfg.checkpoint_config is not None: # save mmaction version in checkpoints as meta data cfg.checkpoint_config.meta = dict(mmact_version=__version__, config=cfg.text) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info('Distributed training: {}'.format(distributed)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_recognizer(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) train_dataset = get_trimmed_dataset(cfg.data.train) val_dataset = get_trimmed_dataset(cfg.data.val) datasets = [] for flow in cfg.workflow: assert flow[0] in ['train', 'val'] if flow[0] == 'train': datasets.append(train_dataset) else: datasets.append(val_dataset) train_network(model, datasets, cfg, distributed=distributed, validate=args.validate, logger=logger)
def test_i3d(): config = get_recognizer_cfg('i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py') config.model['backbone']['pretrained2d'] = False config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) recognizer.cfg = config input_shape = [1, 1, 3, 32, 32, 32] target_layer_name = 'backbone/layer4/1/relu' _do_test_3D_models(recognizer, target_layer_name, input_shape)
def rgb_based_action_recognition(args): rgb_config = mmcv.Config.fromfile(args.rgb_config) rgb_config.model.backbone.pretrained = None rgb_model = build_recognizer( rgb_config.model, test_cfg=rgb_config.get('test_cfg')) load_checkpoint(rgb_model, args.rgb_checkpoint, map_location='cpu') rgb_model.cfg = rgb_config rgb_model.to(args.device) rgb_model.eval() action_results = inference_recognizer(rgb_model, args.video, args.label_map) rgb_action_result = action_results[0][0] return rgb_action_result