def _construct_network(self, cfg): """ Builds a single pathway R(2+1)D model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ self.model = create_r2plus1d( # Input clip configs. input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0], # Model configs. model_depth=cfg.RESNET.DEPTH, model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, # Normalization configs. norm=get_norm(cfg), norm_eps=1e-5, norm_momentum=0.1, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_out=cfg.RESNET.WIDTH_PER_GROUP, stem_conv_kernel_size=(1, 7, 7), stem_conv_stride=(1, 2, 2), # Stage configs. stage_conv_a_kernel_size=( (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), ), stage_conv_b_kernel_size=( (3, 3, 3), (3, 3, 3), (3, 3, 3), (3, 3, 3), ), stage_conv_b_num_groups=(1, 1, 1, 1), stage_conv_b_dilation=( (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), ), stage_spatial_stride=(2, 2, 2, 2), stage_temporal_stride=(1, 1, 2, 2), stage_bottleneck=( create_2plus1d_bottleneck_block, create_2plus1d_bottleneck_block, create_2plus1d_bottleneck_block, create_2plus1d_bottleneck_block, ), # Head configs. head_pool=nn.AvgPool3d, head_pool_kernel_size=( cfg.DATA.NUM_FRAMES // 4, cfg.DATA.TRAIN_CROP_SIZE // 32, cfg.DATA.TRAIN_CROP_SIZE // 32, ), head_activation=None, head_output_with_global_average=False, ) self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
def test_create_r2plus1d(self): """ Test simple r2plus1d with different inputs. """ for input_channel, input_clip_length, input_crop_size in itertools.product( (3, 2), (4, 8), (56, 64)): stage_spatial_stride = (2, 2, 2, 2) stage_temporal_stride = (1, 1, 2, 2) total_spatial_stride = 2 * np.prod(stage_spatial_stride) total_temporal_stride = np.prod(stage_temporal_stride) head_pool_kernel_size = ( input_clip_length // total_temporal_stride, input_crop_size // total_spatial_stride, input_crop_size // total_spatial_stride, ) model = create_r2plus1d( input_channel=input_channel, model_depth=50, model_num_class=400, dropout_rate=0.0, norm=nn.BatchNorm3d, activation=nn.ReLU, stem_dim_out=8, stem_conv_kernel_size=(1, 7, 7), stem_conv_stride=(1, 2, 2), stage_conv_b_kernel_size=((3, 3, 3), ) * 4, stage_spatial_stride=stage_spatial_stride, stage_temporal_stride=stage_temporal_stride, stage_bottleneck=( create_bottleneck_block, create_2plus1d_bottleneck_block, create_2plus1d_bottleneck_block, create_2plus1d_bottleneck_block, ), head_pool=nn.AvgPool3d, head_pool_kernel_size=head_pool_kernel_size, head_output_size=(1, 1, 1), head_activation=nn.Softmax, ) # Test forwarding. for tensor in TestR2plus1d._get_inputs(input_channel, input_clip_length, input_crop_size): if tensor.shape[1] != input_channel: with self.assertRaises(RuntimeError): out = model(tensor) continue out = model(tensor) output_shape = out.shape output_shape_gt = (tensor.shape[0], 400) self.assertEqual( output_shape, output_shape_gt, "Output shape {} is different from expected shape {}". format(output_shape, output_shape_gt), )