Esempio n. 1
0
    def _construct_network(self, cfg):
        """
        Builds a X3D model.

        Args:
            cfg (CfgNode): model building configs, details are in the
                comments of the config file.
        """

        # Params from configs.
        norm_module = get_norm(cfg)
        temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH]

        self.model = create_x3d(
            # Input clip configs.
            input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0],
            input_clip_length=cfg.DATA.NUM_FRAMES,
            input_crop_size=cfg.DATA.TRAIN_CROP_SIZE,
            # Model configs.
            model_num_class=cfg.MODEL.NUM_CLASSES,
            dropout_rate=cfg.MODEL.DROPOUT_RATE,
            width_factor=cfg.X3D.WIDTH_FACTOR,
            depth_factor=cfg.X3D.DEPTH_FACTOR,
            # Normalization configs.
            norm=norm_module,
            norm_eps=1e-5,
            norm_momentum=0.1,
            # Activation configs.
            activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU),
            # Stem configs.
            stem_dim_in=cfg.X3D.DIM_C1,
            stem_conv_kernel_size=(temp_kernel[0][0][0], 3, 3),
            stem_conv_stride=(1, 2, 2),
            # Stage configs.
            stage_conv_kernel_size=(
                (temp_kernel[1][0][0], 3, 3),
                (temp_kernel[2][0][0], 3, 3),
                (temp_kernel[3][0][0], 3, 3),
                (temp_kernel[4][0][0], 3, 3),
            ),
            stage_spatial_stride=(2, 2, 2, 2),
            stage_temporal_stride=(1, 1, 1, 1),
            bottleneck=create_x3d_bottleneck_block,
            bottleneck_factor=cfg.X3D.BOTTLENECK_FACTOR,
            se_ratio=0.0625,
            inner_act=Swish,
            # Head configs.
            head_dim_out=cfg.X3D.DIM_C5,
            head_pool_act=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU),
            head_bn_lin5_on=cfg.X3D.BN_LIN5,
            head_activation=None,
            head_output_with_global_average=False,
        )

        self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
Esempio n. 2
0
def _x3d(
    pretrained: bool = False,
    progress: bool = True,
    checkpoint_path: str = "",
    **kwargs: Any,
) -> nn.Module:
    model = create_x3d(**kwargs)
    if pretrained and len(checkpoint_path) > 0:
        checkpoint = load_state_dict_from_url(checkpoint_path, progress=progress)
        state_dict = checkpoint["model_state"]
        model.load_state_dict(state_dict, strict=True)
    return model
Esempio n. 3
0
def _x3d(
    pretrained: bool = False,
    progress: bool = True,
    checkpoint_path: Optional[str] = None,
    **kwargs: Any,
) -> nn.Module:
    model = create_x3d(**kwargs)
    if pretrained and checkpoint_path is not None:
        # All models are loaded onto CPU by default
        checkpoint = load_state_dict_from_url(checkpoint_path,
                                              progress=progress,
                                              map_location="cpu")
        state_dict = checkpoint["model_state"]
        model.load_state_dict(state_dict)
    return model
Esempio n. 4
0
    def test_create_x3d(self):
        """
        To test different versions of X3D, set the input to:
        X3D-XS: (4, 160, 2.0, 2.2, 2.25)
        X3D-S: (13, 160, 2.0, 2.2, 2.25)
        X3D-M: (16, 224, 2.0, 2.2, 2.25)
        X3D-L: (16, 312, 2.0, 5.0, 2.25)

        Each of the parameters corresponds to input_clip_length, input_crop_size,
        width_factor, depth_factor and bottleneck_factor.
        """
        for (
                input_clip_length,
                input_crop_size,
                width_factor,
                depth_factor,
                bottleneck_factor,
        ) in [
            (4, 160, 2.0, 2.2, 2.25),
        ]:
            model = create_x3d(
                input_clip_length=input_clip_length,
                input_crop_size=input_crop_size,
                model_num_class=400,
                dropout_rate=0.5,
                width_factor=width_factor,
                depth_factor=depth_factor,
                norm=nn.BatchNorm3d,
                activation=nn.ReLU,
                stem_dim_in=12,
                stem_conv_kernel_size=(5, 3, 3),
                stem_conv_stride=(1, 2, 2),
                stage_conv_kernel_size=((3, 3, 3), ) * 4,
                stage_spatial_stride=(2, 2, 2, 2),
                stage_temporal_stride=(1, 1, 1, 1),
                bottleneck=create_x3d_bottleneck_block,
                bottleneck_factor=bottleneck_factor,
                se_ratio=0.0625,
                inner_act=Swish,
                head_dim_out=2048,
                head_pool_act=nn.ReLU,
                head_bn_lin5_on=False,
                head_activation=nn.Softmax,
            )

            # Test forwarding.
            for tensor in TestX3d._get_inputs(input_clip_length,
                                              input_crop_size):
                if tensor.shape[1] != 3:
                    with self.assertRaises(RuntimeError):
                        out = model(tensor)
                    continue

                out = model(tensor)

                output_shape = out.shape
                output_shape_gt = (tensor.shape[0], 400)

                self.assertEqual(
                    output_shape,
                    output_shape_gt,
                    "Output shape {} is different from expected shape {}".
                    format(output_shape, output_shape_gt),
                )