def _construct_network(self, cfg): """ Builds a X3D model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ # Params from configs. norm_module = get_norm(cfg) temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH] self.model = create_x3d( # Input clip configs. input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0], input_clip_length=cfg.DATA.NUM_FRAMES, input_crop_size=cfg.DATA.TRAIN_CROP_SIZE, # Model configs. model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, width_factor=cfg.X3D.WIDTH_FACTOR, depth_factor=cfg.X3D.DEPTH_FACTOR, # Normalization configs. norm=norm_module, norm_eps=1e-5, norm_momentum=0.1, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_in=cfg.X3D.DIM_C1, stem_conv_kernel_size=(temp_kernel[0][0][0], 3, 3), stem_conv_stride=(1, 2, 2), # Stage configs. stage_conv_kernel_size=( (temp_kernel[1][0][0], 3, 3), (temp_kernel[2][0][0], 3, 3), (temp_kernel[3][0][0], 3, 3), (temp_kernel[4][0][0], 3, 3), ), stage_spatial_stride=(2, 2, 2, 2), stage_temporal_stride=(1, 1, 1, 1), bottleneck=create_x3d_bottleneck_block, bottleneck_factor=cfg.X3D.BOTTLENECK_FACTOR, se_ratio=0.0625, inner_act=Swish, # Head configs. head_dim_out=cfg.X3D.DIM_C5, head_pool_act=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), head_bn_lin5_on=cfg.X3D.BN_LIN5, head_activation=None, head_output_with_global_average=False, ) self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
def _x3d( pretrained: bool = False, progress: bool = True, checkpoint_path: str = "", **kwargs: Any, ) -> nn.Module: model = create_x3d(**kwargs) if pretrained and len(checkpoint_path) > 0: checkpoint = load_state_dict_from_url(checkpoint_path, progress=progress) state_dict = checkpoint["model_state"] model.load_state_dict(state_dict, strict=True) return model
def _x3d( pretrained: bool = False, progress: bool = True, checkpoint_path: Optional[str] = None, **kwargs: Any, ) -> nn.Module: model = create_x3d(**kwargs) if pretrained and checkpoint_path is not None: # All models are loaded onto CPU by default checkpoint = load_state_dict_from_url(checkpoint_path, progress=progress, map_location="cpu") state_dict = checkpoint["model_state"] model.load_state_dict(state_dict) return model
def test_create_x3d(self): """ To test different versions of X3D, set the input to: X3D-XS: (4, 160, 2.0, 2.2, 2.25) X3D-S: (13, 160, 2.0, 2.2, 2.25) X3D-M: (16, 224, 2.0, 2.2, 2.25) X3D-L: (16, 312, 2.0, 5.0, 2.25) Each of the parameters corresponds to input_clip_length, input_crop_size, width_factor, depth_factor and bottleneck_factor. """ for ( input_clip_length, input_crop_size, width_factor, depth_factor, bottleneck_factor, ) in [ (4, 160, 2.0, 2.2, 2.25), ]: model = create_x3d( input_clip_length=input_clip_length, input_crop_size=input_crop_size, model_num_class=400, dropout_rate=0.5, width_factor=width_factor, depth_factor=depth_factor, norm=nn.BatchNorm3d, activation=nn.ReLU, stem_dim_in=12, stem_conv_kernel_size=(5, 3, 3), stem_conv_stride=(1, 2, 2), stage_conv_kernel_size=((3, 3, 3), ) * 4, stage_spatial_stride=(2, 2, 2, 2), stage_temporal_stride=(1, 1, 1, 1), bottleneck=create_x3d_bottleneck_block, bottleneck_factor=bottleneck_factor, se_ratio=0.0625, inner_act=Swish, head_dim_out=2048, head_pool_act=nn.ReLU, head_bn_lin5_on=False, head_activation=nn.Softmax, ) # Test forwarding. for tensor in TestX3d._get_inputs(input_clip_length, input_crop_size): if tensor.shape[1] != 3: with self.assertRaises(RuntimeError): out = model(tensor) continue out = model(tensor) output_shape = out.shape output_shape_gt = (tensor.shape[0], 400) self.assertEqual( output_shape, output_shape_gt, "Output shape {} is different from expected shape {}". format(output_shape, output_shape_gt), )