Ejemplo n.º 1
0
def test_tpn():
    config = get_recognizer_cfg('tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py')
    config.model['backbone']['pretrained'] = None

    recognizer = build_recognizer(config.model)

    input_shape = (1, 8, 3, 224, 224)
    demo_inputs = generate_recognizer_demo_inputs(input_shape)

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)

    # Test forward dummy
    with torch.no_grad():
        _recognizer = build_recognizer(config.model)
        img_list = [img[None, :] for img in imgs]
        if hasattr(_recognizer, 'forward_dummy'):
            _recognizer.forward = _recognizer.forward_dummy
        for one_img in img_list:
            _recognizer(one_img)
Ejemplo n.º 2
0
def test_slowfast():
    model, train_cfg, test_cfg = get_recognizer_cfg(
        'slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py')

    recognizer = build_recognizer(model,
                                  train_cfg=train_cfg,
                                  test_cfg=test_cfg)

    input_shape = (1, 3, 3, 16, 32, 32)
    demo_inputs = generate_recognizer_demo_inputs(input_shape, '3D')

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            recognizer = recognizer.cuda()
            imgs = imgs.cuda()
            gt_labels = gt_labels.cuda()
            losses = recognizer(imgs, gt_labels)
            assert isinstance(losses, dict)

            # Test forward test
            with torch.no_grad():
                img_list = [img[None, :] for img in imgs]
                for one_img in img_list:
                    recognizer(one_img, None, return_loss=False)

            # Test forward gradcam
            recognizer(imgs, gradcam=True)
            for one_img in img_list:
                recognizer(one_img, gradcam=True)
    else:
        losses = recognizer(imgs, gt_labels)
        assert isinstance(losses, dict)

        # Test forward test
        with torch.no_grad():
            img_list = [img[None, :] for img in imgs]
            for one_img in img_list:
                recognizer(one_img, None, return_loss=False)

        # Test forward gradcam
        recognizer(imgs, gradcam=True)
        for one_img in img_list:
            recognizer(one_img, gradcam=True)

        # Test the feature max_testing_views
        test_cfg['max_testing_views'] = 1
        recognizer = build_recognizer(model,
                                      train_cfg=train_cfg,
                                      test_cfg=test_cfg)
        with torch.no_grad():
            img_list = [img[None, :] for img in imgs]
            for one_img in img_list:
                recognizer(one_img, None, return_loss=False)
Ejemplo n.º 3
0
def test_tpn():
    model, train_cfg, test_cfg = _get_recognizer_cfg(
        'tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py')
    model['backbone']['pretrained'] = None

    recognizer = build_recognizer(model,
                                  train_cfg=train_cfg,
                                  test_cfg=test_cfg)

    input_shape = (1, 8, 3, 224, 224)
    demo_inputs = generate_demo_inputs(input_shape)

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)

    model, train_cfg, test_cfg = _get_recognizer_cfg(
        'tpn/tpn_slowonly_r50_8x8x1_150e_kinetics_rgb.py')
    model['backbone']['pretrained'] = None

    recognizer = build_recognizer(model,
                                  train_cfg=train_cfg,
                                  test_cfg=test_cfg)

    input_shape = (1, 8, 3, 1, 224, 224)
    demo_inputs = generate_demo_inputs(input_shape, '3D')

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)
Ejemplo n.º 4
0
def test_register_module_hooks():
    _module_hooks = [
        dict(type='GPUNormalize',
             hooked_module='backbone',
             hook_pos='forward_pre',
             input_format='NCHW',
             mean=[123.675, 116.28, 103.53],
             std=[58.395, 57.12, 57.375])
    ]

    repo_dpath = osp.dirname(osp.dirname(osp.dirname(__file__)))
    config_fpath = osp.join(repo_dpath, 'configs/_base_/models/tsm_r50.py')
    config = mmcv.Config.fromfile(config_fpath)
    config.model['backbone']['pretrained'] = None

    # case 1
    module_hooks = copy.deepcopy(_module_hooks)
    module_hooks[0]['hook_pos'] = 'forward_pre'
    recognizer = build_recognizer(config.model)
    handles = register_module_hooks(recognizer, module_hooks)
    assert recognizer.backbone._forward_pre_hooks[
        handles[0].id].__name__ == 'normalize_hook'

    # case 2
    module_hooks = copy.deepcopy(_module_hooks)
    module_hooks[0]['hook_pos'] = 'forward'
    recognizer = build_recognizer(config.model)
    handles = register_module_hooks(recognizer, module_hooks)
    assert recognizer.backbone._forward_hooks[
        handles[0].id].__name__ == 'normalize_hook'

    # case 3
    module_hooks = copy.deepcopy(_module_hooks)
    module_hooks[0]['hooked_module'] = 'cls_head'
    module_hooks[0]['hook_pos'] = 'backward'
    recognizer = build_recognizer(config.model)
    handles = register_module_hooks(recognizer, module_hooks)
    assert recognizer.cls_head._backward_hooks[
        handles[0].id].__name__ == 'normalize_hook'

    # case 4
    module_hooks = copy.deepcopy(_module_hooks)
    module_hooks[0]['hook_pos'] = '_other_pos'
    recognizer = build_recognizer(config.model)
    with pytest.raises(ValueError):
        handles = register_module_hooks(recognizer, module_hooks)

    # case 5
    module_hooks = copy.deepcopy(_module_hooks)
    module_hooks[0]['hooked_module'] = '_other_module'
    recognizer = build_recognizer(config.model)
    with pytest.raises(ValueError):
        handles = register_module_hooks(recognizer, module_hooks)
Ejemplo n.º 5
0
def test_tsn():
    config = get_recognizer_cfg('tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py')
    config.model['backbone']['pretrained'] = None

    recognizer = build_recognizer(config.model)

    input_shape = (1, 3, 3, 32, 32)
    demo_inputs = generate_recognizer_demo_inputs(input_shape)

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)

    mmcls_backbone = dict(type='mmcls.ResNeXt',
                          depth=101,
                          num_stages=4,
                          out_indices=(3, ),
                          groups=32,
                          width_per_group=4,
                          style='pytorch')
    config.model['backbone'] = mmcls_backbone

    recognizer = build_recognizer(config.model)

    input_shape = (1, 3, 3, 32, 32)
    demo_inputs = generate_recognizer_demo_inputs(input_shape)

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)
Ejemplo n.º 6
0
def do_test(cfg, checkpoint, gpus=1, proc_per_gpu=1, task='verb', logger=None):
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.data.test.test_mode = True
    cfg.data.workers_per_gpu = 16
    # if 'input_size' in cfg.data.test and cfg.data.test.input_size == 256:
    #     cfg.model.spatial_temporal_module.spatial_size = 8

    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    assert gpus == 1, "1 gpu is faster now"
    model = build_recognizer(
        cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    load_checkpoint(model, checkpoint, strict=True)
    model = MMDataParallel(model, device_ids=[0])

    data_loader = build_dataloader(
        dataset,
        imgs_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        num_gpus=1,
        dist=False,
        shuffle=False)
    outputs = single_test(model, data_loader)

    print("\n---------------")
    print(outputs)
Ejemplo n.º 7
0
def main():
    args = parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    cfg.data.videos_per_gpu = 1

    net = build_recognizer(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    net.eval()
    load_checkpoint(net, args.checkpoint, force_matching=True)

    input_time_size = cfg.input_clip_length
    input_image_size = (tuple(cfg.input_img_size) if isinstance(
        cfg.input_img_size, (list, tuple)) else
                        (cfg.input_img_size, cfg.input_img_size))
    input_size = (3, input_time_size) + input_image_size

    output_path = args.output_name
    if not output_path.endswith('.onnx'):
        output_path = '{}.onnx'.format(output_path)

    base_output_dir = dirname(output_path)
    if not exists(base_output_dir):
        makedirs(base_output_dir)

    if hasattr(net, 'forward_inference'):
        net.forward = net.forward_inference

    convert_to_onnx(net, input_size, args.output_name, check=args.check)
Ejemplo n.º 8
0
def main():
    parser = ArgumentParser()
    parser.add_argument('config', help='Config file path')
    parser.add_argument('--load_from',
                        help='the checkpoint file to init weights from')
    parser.add_argument('--load2d_from',
                        help='the checkpoint file to init 2D weights from')
    parser.add_argument('--update_config',
                        nargs='+',
                        action=ExtendedDictAction,
                        help='arguments in dict')
    args = parser.parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    if args.update_config is not None:
        cfg.merge_from_dict(args.update_config)
    cfg = update_config(cfg, args)

    net = build_recognizer(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    net.eval()

    if cfg.load_from:
        logger = get_root_logger(log_level=cfg.log_level)
        load_checkpoint(net,
                        cfg.load_from,
                        strict=False,
                        logger=logger,
                        show_converted=True,
                        force_matching=True)

    conv_layers = collect_conv_layers(net)
    show_stat(conv_layers)
Ejemplo n.º 9
0
def main():
    parser = ArgumentParser(description='Measure number of FLOPs')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('--layer_stat',
                        '-ls',
                        action='store_true',
                        help='Whether to print per layer stat')
    args = parser.parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    cfg.data.videos_per_gpu = 1
    cfg.model.type += '_Inference'
    cfg.model.backbone.type += '_Inference'
    cfg.model.backbone.inference = True
    cfg.model.cls_head.type += '_Inference'

    time_length = cfg.data.test.out_length if hasattr(
        cfg.data.test, 'out_length') else cfg.data.test.new_length
    input_size = (cfg.model.backbone.num_input_layers,
                  time_length) + cfg.data.test.input_size

    with torch.no_grad():
        net = build_recognizer(cfg.model,
                               train_cfg=None,
                               test_cfg=cfg.test_cfg).cuda()

        flops, params = get_model_complexity_info(
            net,
            input_size,
            as_strings=True,
            print_per_layer_stat=args.layer_stat)

        print('Flops:  ' + flops)
        print('Params: ' + params)
Ejemplo n.º 10
0
def test_c3d():
    model, train_cfg, test_cfg = _get_recognizer_cfg(
        'c3d/c3d_sports1m_16x1x1_45e_ucf101_rgb.py')
    model['backbone']['pretrained'] = None

    recognizer = build_recognizer(
        model, train_cfg=train_cfg, test_cfg=test_cfg)

    input_shape = (1, 3, 3, 16, 112, 112)
    demo_inputs = generate_demo_inputs(input_shape, '3D')

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)
Ejemplo n.º 11
0
def test_timesformer():
    config = get_recognizer_cfg(
        'timesformer/timesformer_divST_8x32x1_15e_kinetics400_rgb.py')
    config.model['backbone']['pretrained'] = None
    config.model['backbone']['img_size'] = 32

    recognizer = build_recognizer(config.model)

    input_shape = (1, 3, 3, 8, 32, 32)
    demo_inputs = generate_recognizer_demo_inputs(input_shape, '3D')

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)
Ejemplo n.º 12
0
    def __init__(
        self,
        rgb_model_path,
        flow_model_path=None,
        rgb_config_path=os.path.join(py_file_root,
                                     'configs/TSN/my_data/tsn_rgb_sknet.py'),
        flow_config_path=os.path.join(py_file_root,
                                      'configs/TSN/my_data/tsn_flow_sknet.py'),
        classind_path=os.path.join(py_file_root,
                                   'data/my_data/annotations/classInd.txt')):
        self.rgb_cfg = mmcv.Config.fromfile(rgb_config_path)
        self.flow_cfg = mmcv.Config.fromfile(flow_config_path)
        self.rgb_model = build_recognizer(self.rgb_cfg.model,
                                          train_cfg=None,
                                          test_cfg=self.rgb_cfg.test_cfg)

        load_checkpoint(self.rgb_model, rgb_model_path, map_location='cpu')
        '''TODO'''
        # self.flow_model = build_recognizer(flow_model_path, train_cfg=None, test_cfg=self.flow_cfg.test_cfg)
        #
        self.classind = None
        with open(classind_path, 'r') as f:
            tmp = [x.strip().split(' ') for x in f.readlines()]
            self.classind = {int(i): x for i, x in tmp}
        if self.classind == None:
            raise ModuleNotFoundError("No classind file")
Ejemplo n.º 13
0
def init_recognizer(config, checkpoint=None, device='cuda:0', **kwargs):
    """Initialize a recognizer from config file.

    Args:
        config (str | :obj:`mmcv.Config`): Config file path or the config
            object.
        checkpoint (str | None, optional): Checkpoint path/url. If set to None,
            the model will not load any weights. Default: None.
        device (str | :obj:`torch.device`): The desired device of returned
            tensor. Default: 'cuda:0'.

    Returns:
        nn.Module: The constructed recognizer.
    """
    if 'use_frames' in kwargs:
        warnings.warn('The argument `use_frames` is deprecated PR #1191. '
                      'Now you can use models trained with frames or videos '
                      'arbitrarily. ')

    if isinstance(config, str):
        config = mmcv.Config.fromfile(config)
    elif not isinstance(config, mmcv.Config):
        raise TypeError('config must be a filename or Config object, '
                        f'but got {type(config)}')

    # pretrained model is unnecessary since we directly load checkpoint later
    config.model.backbone.pretrained = None
    model = build_recognizer(config.model, test_cfg=config.get('test_cfg'))

    if checkpoint is not None:
        load_checkpoint(model, checkpoint, map_location='cpu')
    model.cfg = config
    model.to(device)
    model.eval()
    return model
Ejemplo n.º 14
0
def init_recognizer(config, checkpoint=None, label_file=None, device='cuda:0'):
    if isinstance(config, str):
        config = mmcv.Config.fromfile(config)
    elif not isinstance(config, mmcv.Config):
        raise TypeError('config must be a filename or Config object, '
                        'but got {}'.format(type(config)))
    config.model.backbone.pretrained = None
    config.model.spatial_temporal_module.spatial_size = 8
    model = build_recognizer(config.model,
                             train_cfg=None,
                             test_cfg=config.test_cfg)
    if checkpoint is not None:
        checkpoint = load_checkpoint(model, checkpoint)
        if label_file is not None:
            classes = [
                line.rstrip() for line in open(label_file, 'r').readlines()
            ]
            model.CLASSES = classes
        else:
            if 'CLASSES' in checkpoint['meta']:
                model.CLASSES = checkpoint['meta']['CLASSES']
            else:
                warnings.warn(
                    'Class names are not saved in the checkpoint\'s '
                    'meta data, use something-something-v2 classes by default.'
                )
                model.CLASSES = get_classes('something=something-v2')
    model.cfg = config  # save the config in the model for convenience
    model.to(device)
    model.eval()
    return model
Ejemplo n.º 15
0
def test_tsn():
    model, train_cfg, test_cfg = get_recognizer_cfg(
        'tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py')
    model['backbone']['pretrained'] = None

    recognizer = build_recognizer(model,
                                  train_cfg=train_cfg,
                                  test_cfg=test_cfg)

    input_shape = (1, 3, 3, 32, 32)
    demo_inputs = generate_recognizer_demo_inputs(input_shape)

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)
Ejemplo n.º 16
0
def test_i3d():
    config = get_recognizer_cfg('i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None

    recognizer = build_recognizer(config.model)

    input_shape = (1, 3, 3, 8, 32, 32)
    demo_inputs = generate_recognizer_demo_inputs(input_shape, '3D')

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            recognizer = recognizer.cuda()
            imgs = imgs.cuda()
            gt_labels = gt_labels.cuda()
            losses = recognizer(imgs, gt_labels)
            assert isinstance(losses, dict)

            # Test forward test
            with torch.no_grad():
                img_list = [img[None, :] for img in imgs]
                for one_img in img_list:
                    recognizer(one_img, None, return_loss=False)

            # Test forward gradcam
            recognizer(imgs, gradcam=True)
            for one_img in img_list:
                recognizer(one_img, gradcam=True)

            # Test forward dummy
            recognizer.forward_dummy(imgs, softmax=False)
            res = recognizer.forward_dummy(imgs, softmax=True)[0]
            assert torch.min(res) >= 0
            assert torch.max(res) <= 1

    else:
        losses = recognizer(imgs, gt_labels)
        assert isinstance(losses, dict)

        # Test forward test
        with torch.no_grad():
            img_list = [img[None, :] for img in imgs]
            for one_img in img_list:
                recognizer(one_img, None, return_loss=False)

        # Test forward gradcam
        recognizer(imgs, gradcam=True)
        for one_img in img_list:
            recognizer(one_img, gradcam=True)

        # Test forward dummy
        recognizer.forward_dummy(imgs, softmax=False)
        res = recognizer.forward_dummy(imgs, softmax=True)[0]
        assert torch.min(res) >= 0
        assert torch.max(res) <= 1
Ejemplo n.º 17
0
def test_x3d():
    config = get_recognizer_cfg('x3d/x3d_s_13x6x1_facebook_kinetics400_rgb.py')
    config.model['backbone']['pretrained'] = None
    recognizer = build_recognizer(config.model)
    recognizer.cfg = config
    input_shape = (1, 1, 3, 13, 32, 32)
    target_layer_name = 'backbone/layer4/1/relu'
    _do_test_3D_models(recognizer, target_layer_name, input_shape)
Ejemplo n.º 18
0
def test_c3d():
    config = get_recognizer_cfg('c3d/c3d_sports1m_16x1x1_45e_ucf101_rgb.py')
    config.model['backbone']['pretrained'] = None
    recognizer = build_recognizer(config.model)
    recognizer.cfg = config
    input_shape = (1, 1, 3, 16, 112, 112)
    target_layer_name = 'backbone/conv5a/activate'
    _do_test_3D_models(recognizer, target_layer_name, input_shape, 101)
Ejemplo n.º 19
0
def test_tsm():
    config = get_cfg('recognition', 'tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py')
    config.model['backbone']['pretrained'] = None
    target_layer_name = 'backbone/layer4/1/relu'

    # base config
    recognizer = build_recognizer(config.model, test_cfg=config.test_cfg)
    recognizer.cfg = config
    input_shape = (1, 8, 3, 32, 32)
    _do_test_2D_models(recognizer, target_layer_name, input_shape)

    # test twice sample + 3 crops, 2*3*8=48
    test_cfg = dict(average_clips='prob')
    recognizer = build_recognizer(config.model, test_cfg=test_cfg)
    recognizer.cfg = config
    input_shape = (1, 48, 3, 32, 32)
    _do_test_2D_models(recognizer, target_layer_name, input_shape)
Ejemplo n.º 20
0
def test_tpn():
    target_layer_name = 'backbone/layer4/1/relu'

    config = _get_cfg('tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py')
    config.model['backbone']['pretrained'] = None
    recognizer = build_recognizer(config.model, test_cfg=config.test_cfg)
    recognizer.cfg = config

    input_shape = (1, 8, 3, 32, 32)
    _do_test_2D_models(recognizer, target_layer_name, input_shape, 174)

    config = _get_cfg('tpn/tpn_slowonly_r50_8x8x1_150e_kinetics_rgb.py')
    config.model['backbone']['pretrained'] = None
    recognizer = build_recognizer(config.model, test_cfg=config.test_cfg)
    recognizer.cfg = config
    input_shape = (1, 3, 3, 8, 32, 32)
    _do_test_3D_models(recognizer, target_layer_name, input_shape)
def test_tsm():
    model, train_cfg, test_cfg = _get_recognizer_cfg(
        'tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py')
    model['backbone']['pretrained'] = None

    recognizer = build_recognizer(model,
                                  train_cfg=train_cfg,
                                  test_cfg=test_cfg)

    input_shape = (1, 8, 3, 32, 32)
    demo_inputs = generate_demo_inputs(input_shape)

    imgs = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(imgs, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # test twice sample + 3 crops
    input_shape = (2, 48, 3, 32, 32)
    demo_inputs = generate_demo_inputs(input_shape)
    imgs = demo_inputs['imgs']

    test_cfg = dict(average_clips='prob')
    recognizer = build_recognizer(model,
                                  train_cfg=train_cfg,
                                  test_cfg=test_cfg)

    # Test forward test
    with torch.no_grad():
        img_list = [img[None, :] for img in imgs]
        for one_img in img_list:
            recognizer(one_img, None, return_loss=False)

    # Test forward gradcam
    recognizer(imgs, gradcam=True)
    for one_img in img_list:
        recognizer(one_img, gradcam=True)
Ejemplo n.º 22
0
def test_skeletongcn():
    config = get_skeletongcn_cfg('stgcn/stgcn_80e_ntu60_xsub_keypoint.py')
    with pytest.raises(TypeError):
        # "pretrained" must be a str or None
        config.model['backbone']['pretrained'] = ['None']
        recognizer = build_recognizer(config.model)

    config.model['backbone']['pretrained'] = None
    recognizer = build_recognizer(config.model)

    input_shape = (1, 3, 300, 17, 2)
    demo_inputs = generate_recognizer_demo_inputs(input_shape, 'skeleton')

    skeletons = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(skeletons, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        skeleton_list = [skeleton[None, :] for skeleton in skeletons]
        for one_skeleton in skeleton_list:
            recognizer(one_skeleton, None, return_loss=False)

    # test stgcn without edge importance weighting
    config.model['backbone']['edge_importance_weighting'] = False
    recognizer = build_recognizer(config.model)

    input_shape = (1, 3, 300, 17, 2)
    demo_inputs = generate_recognizer_demo_inputs(input_shape, 'skeleton')

    skeletons = demo_inputs['imgs']
    gt_labels = demo_inputs['gt_labels']

    losses = recognizer(skeletons, gt_labels)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        skeleton_list = [skeleton[None, :] for skeleton in skeletons]
        for one_skeleton in skeleton_list:
            recognizer(one_skeleton, None, return_loss=False)
Ejemplo n.º 23
0
def test_tsn():
    config = get_recognizer_cfg('tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py')
    config.model['backbone']['pretrained'] = None
    recognizer = build_recognizer(config.model)
    recognizer.cfg = config

    input_shape = (1, 25, 3, 32, 32)
    target_layer_name = 'backbone/layer4/1/relu'

    _do_test_2D_models(recognizer, target_layer_name, input_shape)
Ejemplo n.º 24
0
def test_slowfast():
    config = _get_cfg('slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py')

    recognizer = build_recognizer(config.model, test_cfg=config.test_cfg)
    recognizer.cfg = config

    input_shape = (1, 1, 3, 32, 32, 32)
    target_layer_name = 'backbone/slow_path/layer4/1/relu'

    _do_test_3D_models(recognizer, target_layer_name, input_shape)
Ejemplo n.º 25
0
def test_tin():
    config = get_recognizer_cfg(
        'tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py')
    config.model['backbone']['pretrained'] = None
    target_layer_name = 'backbone/layer4/1/relu'

    recognizer = build_recognizer(config.model)
    recognizer.cfg = config
    input_shape = (1, 8, 3, 64, 64)
    _do_test_2D_models(
        recognizer, target_layer_name, input_shape, device='cuda:0')
Ejemplo n.º 26
0
def tsm(**kwargs):
    """Constructs a tsm model.
    """
    tsmconfig = 'config_files/sthv2/tsm_baseline.py'
    cfg = Config1.fromfile(tsmconfig)
    tsm = build_recognizer(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    model = tsm
    print('model', model)
    return model
Ejemplo n.º 27
0
def test_csn():
    config = get_recognizer_cfg(
        'csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None

    recognizer = build_recognizer(config.model)
    recognizer.cfg = config
    input_shape = (1, 1, 3, 32, 32, 32)
    target_layer_name = 'backbone/layer4/1/relu'

    _do_test_3D_models(recognizer, target_layer_name, input_shape)
Ejemplo n.º 28
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    cfg.gpus = args.gpus
    if cfg.checkpoint_config is not None:
        # save mmaction version in checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmact_version=__version__,
                                          config=cfg.text)

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info('Distributed training: {}'.format(distributed))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)

    model = build_recognizer(cfg.model,
                             train_cfg=cfg.train_cfg,
                             test_cfg=cfg.test_cfg)

    train_dataset = get_trimmed_dataset(cfg.data.train)
    val_dataset = get_trimmed_dataset(cfg.data.val)
    datasets = []
    for flow in cfg.workflow:
        assert flow[0] in ['train', 'val']
        if flow[0] == 'train':
            datasets.append(train_dataset)
        else:
            datasets.append(val_dataset)
    train_network(model,
                  datasets,
                  cfg,
                  distributed=distributed,
                  validate=args.validate,
                  logger=logger)
Ejemplo n.º 29
0
def test_i3d():
    config = get_recognizer_cfg('i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None

    recognizer = build_recognizer(config.model)
    recognizer.cfg = config

    input_shape = [1, 1, 3, 32, 32, 32]
    target_layer_name = 'backbone/layer4/1/relu'

    _do_test_3D_models(recognizer, target_layer_name, input_shape)
Ejemplo n.º 30
0
def rgb_based_action_recognition(args):
    rgb_config = mmcv.Config.fromfile(args.rgb_config)
    rgb_config.model.backbone.pretrained = None
    rgb_model = build_recognizer(
        rgb_config.model, test_cfg=rgb_config.get('test_cfg'))
    load_checkpoint(rgb_model, args.rgb_checkpoint, map_location='cpu')
    rgb_model.cfg = rgb_config
    rgb_model.to(args.device)
    rgb_model.eval()
    action_results = inference_recognizer(rgb_model, args.video,
                                          args.label_map)
    rgb_action_result = action_results[0][0]
    return rgb_action_result