예제 #1
0
    def _construct_network(self, cfg):
        """
        Builds a single pathway R(2+1)D model.

        Args:
            cfg (CfgNode): model building configs, details are in the
                comments of the config file.
        """
        self.model = create_r2plus1d(
            # Input clip configs.
            input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0],
            # Model configs.
            model_depth=cfg.RESNET.DEPTH,
            model_num_class=cfg.MODEL.NUM_CLASSES,
            dropout_rate=cfg.MODEL.DROPOUT_RATE,
            # Normalization configs.
            norm=get_norm(cfg),
            norm_eps=1e-5,
            norm_momentum=0.1,
            # Activation configs.
            activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU),
            # Stem configs.
            stem_dim_out=cfg.RESNET.WIDTH_PER_GROUP,
            stem_conv_kernel_size=(1, 7, 7),
            stem_conv_stride=(1, 2, 2),
            # Stage configs.
            stage_conv_a_kernel_size=(
                (1, 1, 1),
                (1, 1, 1),
                (1, 1, 1),
                (1, 1, 1),
            ),
            stage_conv_b_kernel_size=(
                (3, 3, 3),
                (3, 3, 3),
                (3, 3, 3),
                (3, 3, 3),
            ),
            stage_conv_b_num_groups=(1, 1, 1, 1),
            stage_conv_b_dilation=(
                (1, 1, 1),
                (1, 1, 1),
                (1, 1, 1),
                (1, 1, 1),
            ),
            stage_spatial_stride=(2, 2, 2, 2),
            stage_temporal_stride=(1, 1, 2, 2),
            stage_bottleneck=(
                create_2plus1d_bottleneck_block,
                create_2plus1d_bottleneck_block,
                create_2plus1d_bottleneck_block,
                create_2plus1d_bottleneck_block,
            ),
            # Head configs.
            head_pool=nn.AvgPool3d,
            head_pool_kernel_size=(
                cfg.DATA.NUM_FRAMES // 4,
                cfg.DATA.TRAIN_CROP_SIZE // 32,
                cfg.DATA.TRAIN_CROP_SIZE // 32,
            ),
            head_activation=None,
            head_output_with_global_average=False,
        )

        self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
예제 #2
0
    def test_create_r2plus1d(self):
        """
        Test simple r2plus1d with different inputs.
        """
        for input_channel, input_clip_length, input_crop_size in itertools.product(
            (3, 2), (4, 8), (56, 64)):
            stage_spatial_stride = (2, 2, 2, 2)
            stage_temporal_stride = (1, 1, 2, 2)

            total_spatial_stride = 2 * np.prod(stage_spatial_stride)
            total_temporal_stride = np.prod(stage_temporal_stride)
            head_pool_kernel_size = (
                input_clip_length // total_temporal_stride,
                input_crop_size // total_spatial_stride,
                input_crop_size // total_spatial_stride,
            )

            model = create_r2plus1d(
                input_channel=input_channel,
                model_depth=50,
                model_num_class=400,
                dropout_rate=0.0,
                norm=nn.BatchNorm3d,
                activation=nn.ReLU,
                stem_dim_out=8,
                stem_conv_kernel_size=(1, 7, 7),
                stem_conv_stride=(1, 2, 2),
                stage_conv_b_kernel_size=((3, 3, 3), ) * 4,
                stage_spatial_stride=stage_spatial_stride,
                stage_temporal_stride=stage_temporal_stride,
                stage_bottleneck=(
                    create_bottleneck_block,
                    create_2plus1d_bottleneck_block,
                    create_2plus1d_bottleneck_block,
                    create_2plus1d_bottleneck_block,
                ),
                head_pool=nn.AvgPool3d,
                head_pool_kernel_size=head_pool_kernel_size,
                head_output_size=(1, 1, 1),
                head_activation=nn.Softmax,
            )

            # Test forwarding.
            for tensor in TestR2plus1d._get_inputs(input_channel,
                                                   input_clip_length,
                                                   input_crop_size):
                if tensor.shape[1] != input_channel:
                    with self.assertRaises(RuntimeError):
                        out = model(tensor)
                    continue

                out = model(tensor)

                output_shape = out.shape
                output_shape_gt = (tensor.shape[0], 400)

                self.assertEqual(
                    output_shape,
                    output_shape_gt,
                    "Output shape {} is different from expected shape {}".
                    format(output_shape, output_shape_gt),
                )