Example #1
0
def test_bmn():
    model_cfg = dict(type='BMN',
                     temporal_dim=100,
                     boundary_ratio=0.5,
                     num_samples=32,
                     num_samples_per_bin=3,
                     feat_dim=400,
                     soft_nms_alpha=0.4,
                     soft_nms_low_threshold=0.5,
                     soft_nms_high_threshold=0.9,
                     post_process_top_k=100)
    if torch.cuda.is_available():
        localizer_bmn = build_localizer(model_cfg).cuda()
        raw_feature = torch.rand(8, 400, 100).cuda()
        gt_bbox = np.array([[[0.1, 0.3], [0.375, 0.625]]] * 8)
        losses = localizer_bmn(raw_feature, gt_bbox)
        assert isinstance(losses, dict)

        # Test forward test
        video_meta = [
            dict(video_name='v_test',
                 duration_second=100,
                 duration_frame=960,
                 feature_frame=960)
        ]
        with torch.no_grad():
            one_raw_feature = torch.rand(1, 400, 100).cuda()
            localizer_bmn(one_raw_feature,
                          gt_bbox=None,
                          video_meta=video_meta,
                          return_loss=False)
    else:
        localizer_bmn = build_localizer(model_cfg)
        raw_feature = torch.rand(8, 400, 100)
        gt_bbox = torch.Tensor([[[0.1, 0.3], [0.375, 0.625]]] * 8)
        losses = localizer_bmn(raw_feature, gt_bbox)
        assert isinstance(losses, dict)

        # Test forward test
        video_meta = [
            dict(video_name='v_test',
                 duration_second=100,
                 duration_frame=960,
                 feature_frame=960)
        ]
        with torch.no_grad():
            one_raw_feature = torch.rand(1, 400, 100)
            localizer_bmn(one_raw_feature,
                          gt_bbox=None,
                          video_meta=video_meta,
                          return_loss=False)
Example #2
0
def test_bmn():
    model_cfg = get_localizer_cfg(
        'bmn/bmn_400x100_2x8_9e_activitynet_feature.py')

    if torch.cuda.is_available():
        localizer_bmn = build_localizer(model_cfg.model).cuda()
        raw_feature = torch.rand(8, 400, 100).cuda()
        gt_bbox = np.array([[[0.1, 0.3], [0.375, 0.625]]] * 8)
        losses = localizer_bmn(raw_feature, gt_bbox)
        assert isinstance(losses, dict)

        # Test forward test
        video_meta = [
            dict(video_name='v_test',
                 duration_second=100,
                 duration_frame=960,
                 feature_frame=960)
        ]
        with torch.no_grad():
            one_raw_feature = torch.rand(1, 400, 100).cuda()
            localizer_bmn(one_raw_feature,
                          gt_bbox=None,
                          video_meta=video_meta,
                          return_loss=False)
    else:
        localizer_bmn = build_localizer(model_cfg.model)
        raw_feature = torch.rand(8, 400, 100)
        gt_bbox = torch.Tensor([[[0.1, 0.3], [0.375, 0.625]]] * 8)
        losses = localizer_bmn(raw_feature, gt_bbox)
        assert isinstance(losses, dict)

        # Test forward test
        video_meta = [
            dict(video_name='v_test',
                 duration_second=100,
                 duration_frame=960,
                 feature_frame=960)
        ]
        with torch.no_grad():
            one_raw_feature = torch.rand(1, 400, 100)
            localizer_bmn(one_raw_feature,
                          gt_bbox=None,
                          video_meta=video_meta,
                          return_loss=False)
Example #3
0
def test_config_build_localizer():
    """Test that all mmaction models defined in the configs can be
    initialized."""
    config_fpaths = _get_config_path_for_localizer()

    # test all config file in `configs/localization` directory
    for config_fpath in config_fpaths:
        config_mod = mmcv.Config.fromfile(config_fpath)
        print(f'Building localizer, config_fpath = {config_fpath!r}')
        if config_mod.get('model', None):
            localizer = build_localizer(config_mod.model)
            assert isinstance(localizer, nn.Module)
Example #4
0
def test_tem():
    model_cfg = get_localizer_cfg(
        'bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py')

    localizer_tem = build_localizer(model_cfg.model)
    raw_feature = torch.rand(8, 400, 100)
    gt_bbox = torch.Tensor([[[1.0, 3.0], [3.0, 5.0]]] * 8)
    losses = localizer_tem(raw_feature, gt_bbox)
    assert isinstance(losses, dict)

    # Test forward test
    video_meta = [{'video_name': 'v_test'}]
    with torch.no_grad():
        for one_raw_feature in raw_feature:
            one_raw_feature = one_raw_feature.reshape(1, 400, 100)
            localizer_tem(
                one_raw_feature, video_meta=video_meta, return_loss=False)
Example #5
0
def test_pem():
    model_cfg = dict(type='PEM',
                     pem_feat_dim=32,
                     pem_hidden_dim=256,
                     pem_u_ratio_m=1,
                     pem_u_ratio_l=2,
                     pem_high_temporal_iou_threshold=0.6,
                     pem_low_temporal_iou_threshold=2.2,
                     soft_nms_alpha=0.75,
                     soft_nms_low_threshold=0.65,
                     soft_nms_high_threshold=0.9,
                     post_process_top_k=100)

    localizer_pem = build_localizer(model_cfg)
    bsp_feature = torch.rand(8, 100, 32)
    reference_temporal_iou = torch.rand(8, 100)
    losses = localizer_pem(bsp_feature, reference_temporal_iou)
    assert isinstance(losses, dict)

    # Test forward test
    tmin = torch.rand(100)
    tmax = torch.rand(100)
    tmin_score = torch.rand(100)
    tmax_score = torch.rand(100)

    video_meta = [
        dict(video_name='v_test',
             duration_second=100,
             duration_frame=1000,
             annotations=[{
                 'segment': [0.3, 0.6],
                 'label': 'Rock climbing'
             }],
             feature_frame=900)
    ]
    with torch.no_grad():
        for one_bsp_feature in bsp_feature:
            one_bsp_feature = one_bsp_feature.reshape(1, 100, 32)
            localizer_pem(one_bsp_feature,
                          tmin=tmin,
                          tmax=tmax,
                          tmin_score=tmin_score,
                          tmax_score=tmax_score,
                          video_meta=video_meta,
                          return_loss=False)
Example #6
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    cfg.gpus = args.gpus
    if cfg.checkpoint_config is not None:
        # save mmaction version in checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmact_version=__version__,
                                          config=cfg.text)

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info('Distributed training: {}'.format(distributed))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)

    model = build_localizer(cfg.model,
                            train_cfg=cfg.train_cfg,
                            test_cfg=cfg.test_cfg)

    train_dataset = get_trimmed_dataset(cfg.data.train)
    train_network(model,
                  train_dataset,
                  cfg,
                  distributed=distributed,
                  validate=args.validate,
                  logger=logger)
Example #7
0
def test_tem():
    model_cfg = dict(type='TEM',
                     temporal_dim=100,
                     boundary_ratio=0.1,
                     tem_feat_dim=400,
                     tem_hidden_dim=512,
                     tem_match_threshold=0.5)

    localizer_tem = build_localizer(model_cfg)
    raw_feature = torch.rand(8, 400, 100)
    gt_bbox = torch.Tensor([[[1.0, 3.0], [3.0, 5.0]]] * 8)
    losses = localizer_tem(raw_feature, gt_bbox)
    assert isinstance(losses, dict)

    # Test forward test
    video_meta = [{'video_name': 'v_test'}]
    with torch.no_grad():
        for one_raw_feature in raw_feature:
            one_raw_feature = one_raw_feature.reshape(1, 400, 100)
            localizer_tem(one_raw_feature,
                          video_meta=video_meta,
                          return_loss=False)
Example #8
0
def test_pem():
    model_cfg = get_localizer_cfg(
        'bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py')

    localizer_pem = build_localizer(model_cfg.model)
    bsp_feature = torch.rand(8, 100, 32)
    reference_temporal_iou = torch.rand(8, 100)
    losses = localizer_pem(bsp_feature, reference_temporal_iou)
    assert isinstance(losses, dict)

    # Test forward test
    tmin = torch.rand(100)
    tmax = torch.rand(100)
    tmin_score = torch.rand(100)
    tmax_score = torch.rand(100)

    video_meta = [
        dict(
            video_name='v_test',
            duration_second=100,
            duration_frame=1000,
            annotations=[{
                'segment': [0.3, 0.6],
                'label': 'Rock climbing'
            }],
            feature_frame=900)
    ]
    with torch.no_grad():
        for one_bsp_feature in bsp_feature:
            one_bsp_feature = one_bsp_feature.reshape(1, 100, 32)
            localizer_pem(
                one_bsp_feature,
                tmin=tmin,
                tmax=tmax,
                tmin_score=tmin_score,
                tmax_score=tmax_score,
                video_meta=video_meta,
                return_loss=False)
Example #9
0
def test_ssn_test():
    test_cfg = mmcv.ConfigDict(
        dict(ssn=dict(sampler=dict(test_interval=6, batch_size=16),
                      evaluater=dict(top_k=2000,
                                     nms=0.2,
                                     softmax_before_filter=True,
                                     cls_score_dict=None,
                                     cls_top_k=2))))
    base_model_cfg = dict(type='SSN',
                          backbone=dict(type='ResNet',
                                        pretrained=None,
                                        depth=18,
                                        norm_eval=True),
                          spatial_type='avg',
                          dropout_ratio=0.8,
                          cls_head=dict(type='SSNHead',
                                        dropout_ratio=0.,
                                        in_channels=512,
                                        num_classes=20,
                                        consensus=dict(type='STPPTest',
                                                       stpp_stage=(1, 1, 1)),
                                        use_regression=True),
                          test_cfg=test_cfg)
    maxpool_model_cfg = copy.deepcopy(base_model_cfg)
    maxpool_model_cfg['spatial_type'] = 'max'
    non_regression_cfg = copy.deepcopy(base_model_cfg)
    non_regression_cfg['cls_head']['use_regression'] = False
    non_regression_cfg['cls_head']['consensus']['use_regression'] = False
    tuple_stage_cfg = copy.deepcopy(base_model_cfg)
    tuple_stage_cfg['cls_head']['consensus']['stpp_stage'] = (1, (1, 2), 1)
    str_stage_cfg = copy.deepcopy(base_model_cfg)
    str_stage_cfg['cls_head']['consensus']['stpp_stage'] = ('error', )

    imgs = torch.rand(1, 8, 3, 224, 224)
    relative_proposal_list = torch.Tensor([[[0.2500, 0.6250], [0.3750,
                                                               0.7500]]])
    scale_factor_list = torch.Tensor([[[1.0000, 1.0000], [1.0000, 0.2661]]])
    proposal_tick_list = torch.LongTensor([[[1, 2, 5, 7], [20, 30, 60, 80]]])
    reg_norm_consts = torch.Tensor([[[-0.0603, 0.0325], [0.0752, 0.1596]]])

    localizer_ssn = build_localizer(base_model_cfg)
    localizer_ssn_maxpool = build_localizer(maxpool_model_cfg)
    localizer_ssn_non_regression = build_localizer(non_regression_cfg)
    localizer_ssn_tuple_stage_cfg = build_localizer(tuple_stage_cfg)
    with pytest.raises(ValueError):
        build_localizer(str_stage_cfg)

    if torch.cuda.is_available():
        localizer_ssn = localizer_ssn.cuda()
        localizer_ssn_maxpool = localizer_ssn_maxpool.cuda()
        localizer_ssn_non_regression = localizer_ssn_non_regression.cuda()
        localizer_ssn_tuple_stage_cfg = localizer_ssn_tuple_stage_cfg.cuda()
        imgs = imgs.cuda()
        relative_proposal_list = relative_proposal_list.cuda()
        scale_factor_list = scale_factor_list.cuda()
        proposal_tick_list = proposal_tick_list.cuda()
        reg_norm_consts = reg_norm_consts.cuda()

    with torch.no_grad():
        # Test normal case
        localizer_ssn(imgs,
                      relative_proposal_list=relative_proposal_list,
                      scale_factor_list=scale_factor_list,
                      proposal_tick_list=proposal_tick_list,
                      reg_norm_consts=reg_norm_consts,
                      return_loss=False)

        # Test SSN model with max spatial pooling
        localizer_ssn_maxpool(imgs,
                              relative_proposal_list=relative_proposal_list,
                              scale_factor_list=scale_factor_list,
                              proposal_tick_list=proposal_tick_list,
                              reg_norm_consts=reg_norm_consts,
                              return_loss=False)

        # Test SSN model without regression
        localizer_ssn_non_regression(
            imgs,
            relative_proposal_list=relative_proposal_list,
            scale_factor_list=scale_factor_list,
            proposal_tick_list=proposal_tick_list,
            reg_norm_consts=reg_norm_consts,
            return_loss=False)

        # Test SSN model with tuple stage cfg.
        localizer_ssn_tuple_stage_cfg(
            imgs,
            relative_proposal_list=relative_proposal_list,
            scale_factor_list=scale_factor_list,
            proposal_tick_list=proposal_tick_list,
            reg_norm_consts=reg_norm_consts,
            return_loss=False)
Example #10
0
def test_ssn_train():
    train_cfg = mmcv.ConfigDict(
        dict(ssn=dict(assigner=dict(positive_iou_threshold=0.7,
                                    background_iou_threshold=0.01,
                                    incomplete_iou_threshold=0.3,
                                    background_coverage_threshold=0.02,
                                    incomplete_overlap_threshold=0.01),
                      sampler=dict(num_per_video=8,
                                   positive_ratio=1,
                                   background_ratio=1,
                                   incomplete_ratio=6,
                                   add_gt_as_proposals=True),
                      loss_weight=dict(comp_loss_weight=0.1,
                                       reg_loss_weight=0.1),
                      debug=False)))
    base_model_cfg = dict(type='SSN',
                          backbone=dict(type='ResNet',
                                        pretrained=None,
                                        depth=18,
                                        norm_eval=True),
                          spatial_type='avg',
                          dropout_ratio=0.8,
                          loss_cls=dict(type='SSNLoss'),
                          cls_head=dict(type='SSNHead',
                                        dropout_ratio=0.,
                                        in_channels=512,
                                        num_classes=20,
                                        consensus=dict(type='STPPTrain',
                                                       stpp_stage=(1, 1, 1),
                                                       num_segments_list=(2, 5,
                                                                          2)),
                                        use_regression=True),
                          train_cfg=train_cfg)
    dropout_cfg = copy.deepcopy(base_model_cfg)
    dropout_cfg['dropout_ratio'] = 0
    dropout_cfg['cls_head']['dropout_ratio'] = 0.5
    non_regression_cfg = copy.deepcopy(base_model_cfg)
    non_regression_cfg['cls_head']['use_regression'] = False

    imgs = torch.rand(1, 8, 9, 3, 224, 224)
    proposal_scale_factor = torch.Tensor([[[1.0345, 1.0345], [1.0028, 0.0028],
                                           [1.0013, 1.0013], [1.0008, 1.0008],
                                           [0.3357, 1.0006], [1.0006, 1.0006],
                                           [0.0818, 1.0005], [1.0030,
                                                              1.0030]]])
    proposal_type = torch.Tensor([[0, 1, 1, 1, 1, 1, 1, 2]])
    proposal_labels = torch.LongTensor([[8, 8, 8, 8, 8, 8, 8, 0]])
    reg_targets = torch.Tensor([[[0.2929, 0.2694], [0.0000, 0.0000],
                                 [0.0000, 0.0000], [0.0000, 0.0000],
                                 [0.0000, 0.0000], [0.0000, 0.0000],
                                 [0.0000, 0.0000], [0.0000, 0.0000]]])

    localizer_ssn = build_localizer(base_model_cfg)
    localizer_ssn_dropout = build_localizer(dropout_cfg)
    localizer_ssn_non_regression = build_localizer(non_regression_cfg)

    if torch.cuda.is_available():
        localizer_ssn = localizer_ssn.cuda()
        localizer_ssn_dropout = localizer_ssn_dropout.cuda()
        localizer_ssn_non_regression = localizer_ssn_non_regression.cuda()
        imgs = imgs.cuda()
        proposal_scale_factor = proposal_scale_factor.cuda()
        proposal_type = proposal_type.cuda()
        proposal_labels = proposal_labels.cuda()
        reg_targets = reg_targets.cuda()

    # Train normal case
    losses = localizer_ssn(imgs,
                           proposal_scale_factor=proposal_scale_factor,
                           proposal_type=proposal_type,
                           proposal_labels=proposal_labels,
                           reg_targets=reg_targets)
    assert isinstance(losses, dict)

    # Train SSN without dropout in model, with dropout in head
    losses = localizer_ssn_dropout(imgs,
                                   proposal_scale_factor=proposal_scale_factor,
                                   proposal_type=proposal_type,
                                   proposal_labels=proposal_labels,
                                   reg_targets=reg_targets)
    assert isinstance(losses, dict)

    # Train SSN model without regression
    losses = localizer_ssn_non_regression(
        imgs,
        proposal_scale_factor=proposal_scale_factor,
        proposal_type=proposal_type,
        proposal_labels=proposal_labels,
        reg_targets=reg_targets)
    assert isinstance(losses, dict)
Example #11
0
def main():
    args = parse_args()

    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
        raise ValueError('The output file must be a pkl file.')

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.data.test.test_mode = True

    # reorganize stpp
    num_classes = (cfg.model.cls_head.num_classes -
                   1 if cfg.model.cls_head.with_bg else
                   cfg.model.cls_head.num_classes)
    stpp_feat_multiplier = 0
    for stpp_subcfg in cfg.model.segmental_consensus.stpp_cfg:
        _, mult = parse_stage_config(stpp_subcfg)
        stpp_feat_multiplier += mult
    cfg.model.segmental_consensus = dict(
        type="STPPReorganized",
        standalong_classifier=cfg.model.segmental_consensus.
        standalong_classifier,
        feat_dim=num_classes + 1 + num_classes * 3 * stpp_feat_multiplier,
        act_score_len=num_classes + 1,
        comp_score_len=num_classes,
        reg_score_len=num_classes * 2,
        stpp_cfg=cfg.model.segmental_consensus.stpp_cfg)

    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    if args.gpus == 1:
        model = build_localizer(cfg.model,
                                train_cfg=None,
                                test_cfg=cfg.test_cfg)
        load_checkpoint(model, args.checkpoint, strict=True)
        model = MMDataParallel(model, device_ids=[0])

        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            num_gpus=1,
            dist=False,
            shuffle=False)
        outputs = single_test(model, data_loader)
    else:
        model_args = cfg.model.copy()
        model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
        model_type = getattr(localizers, model_args.pop('type'))
        outputs = parallel_test(model_type,
                                model_args,
                                args.checkpoint,
                                dataset,
                                _data_func,
                                range(args.gpus),
                                workers_per_gpu=args.proc_per_gpu)

    if args.out:
        print('writing results to {}'.format(args.out))
        mmcv.dump(outputs, args.out)

    eval_type = args.eval
    if eval_type:
        print('Starting evaluate {}'.format(eval_type))

        detections = results2det(dataset, outputs,
                                 **cfg.test_cfg.ssn.evaluater)

        if not args.no_regression:
            print("Performing location regression")
            for cls in range(len(detections)):
                detections[cls] = {
                    k: perform_regression(v)
                    for k, v in detections[cls].items()
                }
            print("Regression finished")

        print("Performing NMS")
        for cls in range(len(detections)):
            detections[cls] = {
                k: temporal_nms(v, cfg.test_cfg.ssn.evaluater.nms)
                for k, v in detections[cls].items()
            }
        print("NMS finished")

        if eval_type == 'activitynet':
            iou_range = np.arange(0.5, 1.0, 0.05)
        elif eval_type == 'thumos14':
            iou_range = np.arange(0.1, 1.0, .1)

        # get gt
        all_gt = pd.DataFrame(dataset.get_all_gt(),
                              columns=['video-id', 'cls', 't-start', 't-end'])
        gt_by_cls = [
            all_gt[all_gt.cls == cls].reset_index(drop=True).drop('cls', 1)
            for cls in range(len(detections))
        ]
        plain_detections = [
            det2df(detections, cls) for cls in range(len(detections))
        ]
        ap_values = eval_ap_parallel(plain_detections, gt_by_cls, iou_range)
        map_iou = ap_values.mean(axis=0)
        print("Evaluation finished")

        # display
        display_title = 'Temporal detection performance ({})'.format(args.eval)
        display_data = [['IoU thresh'], ['mean AP']]

        for i in range(len(iou_range)):
            display_data[0].append('{:.02f}'.format(iou_range[i]))
            display_data[1].append('{:.04f}'.format(map_iou[i]))
        table = AsciiTable(display_data, display_title)
        table.justify_columns[-1] = 'right'
        table.inner_footing_row_border = True
        print(table.table)