Example #1
0
    def __init__(self,
                 block,
                 layers,
                 channels,
                 classes=1000,
                 thumbnail=False,
                 caption_length=50,
                 **kwargs):
        super(ResNetV1, self).__init__(**kwargs)
        assert len(layers) == len(channels) - 1
        with self.name_scope():
            self.caption_length = caption_length
            self.features = nn.HybridSequential(prefix='')
            if thumbnail:
                self.features.add(_conv3x3(channels[0], 1, 0))
            else:
                self.features.add(
                    nn.Conv3D(channels[0], 7, 2, 3, use_bias=False))
                self.features.add(nn.BatchNorm())
                self.features.add(nn.Activation('relu'))
                self.features.add(nn.MaxPool3D(3, 2, 1))

            for i, num_layer in enumerate(layers):
                stride = 1 if i == 0 else 2
                self.features.add(
                    self._make_layer(block,
                                     num_layer,
                                     channels[i + 1],
                                     stride,
                                     i + 1,
                                     in_channels=channels[i]))
            self.features.add(nn.GlobalAvgPool3D())
            #self.features.add(nn.Dense(classes, in_units=in_channels))
            self.output = nn.Dense(caption_length * caption_length)
Example #2
0
File: r21d.py Project: nrjcs/Tennis
    def __init__(self, block, layers, channels, classes=400, **kwargs):
        super(R21DV1, self).__init__(**kwargs)
        assert len(layers) == len(channels) - 1
        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(
                _conv21d(channels[0], [3, 7, 7],
                         strides=[1, 2, 2],
                         padding=[1, 3, 3],
                         mid_channels=45,
                         prefix='init_'))
            self.features.add(
                nn.BatchNorm(epsilon=1e-3,
                             momentum=0.9,
                             use_global_stats=True,
                             prefix='init_'))
            self.features.add(nn.LeakyReLU(0.0))

            for i, num_layer in enumerate(layers):
                stride = 1 if i == 0 else 2
                self.features.add(
                    self._make_layer(block,
                                     num_layer,
                                     channels[i + 1],
                                     stride,
                                     i + 1,
                                     in_channels=channels[i]))
            self.avg = nn.GlobalAvgPool3D()

            self.dense = nn.Dense(classes, in_units=channels[-1])
Example #3
0
    def __init__(self, nclass, block, layers, dropout_ratio=0.5,
                 num_segments=1, num_crop=1, feat_ext=False, use_lateral=False,
                 init_std=0.001, ctx=None, partial_bn=False,
                 norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
        super(R2Plus1D, self).__init__()
        self.partial_bn = partial_bn
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_ext = feat_ext
        self.use_lateral = use_lateral
        self.inplanes = 64
        self.feat_dim = 512 * block.expansion
               

        with self.name_scope():
            self.conv1 = nn.Conv3D(in_channels=3, channels=45, kernel_size=(1, 7, 7),
                                   strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False)
            self.bn1 = norm_layer(in_channels=45, **({} if norm_kwargs is None else norm_kwargs))
            self.relu = nn.Activation('relu')
            self.conv2 = conv3x1x1(in_planes=45, out_planes=64)
            self.bn2 = norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs))

            if self.partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.layer1 = self._make_res_layer(block=block,
                                               planes=64,
                                               blocks=layers[0],
                                               layer_name='layer1_')
            self.layer2 = self._make_res_layer(block=block,
                                               planes=128,
                                               blocks=layers[1],
                                               stride=2,
                                               layer_name='layer2_')
            self.layer3 = self._make_res_layer(block=block,
                                               planes=256,
                                               blocks=layers[2],
                                               stride=2,
                                               layer_name='layer3_')
            self.layer4 = self._make_res_layer(block=block,
                                               planes=512,
                                               blocks=layers[3],
                                               stride=2,
                                               layer_name='layer4_')

            self.avgpool = nn.GlobalAvgPool3D()
            self.dropout = nn.Dropout(rate=self.dropout_ratio)
            self.fc = nn.Dense(in_units=self.feat_dim, units=nclass,
                               weight_initializer=init.Normal(sigma=self.init_std))
Example #4
0
def test_pool():
    layers1d = [
        nn.MaxPool1D(),
        nn.MaxPool1D(3),
        nn.MaxPool1D(3, 2),
        nn.AvgPool1D(),
        nn.AvgPool1D(count_include_pad=False),
        nn.GlobalAvgPool1D(),
        ]
    for layer in layers1d:
        check_layer_forward(layer, (1, 2, 10))


    layers2d = [
        nn.MaxPool2D(),
        nn.MaxPool2D((3, 3)),
        nn.MaxPool2D(3, 2),
        nn.AvgPool2D(),
        nn.AvgPool2D(count_include_pad=False),
        nn.GlobalAvgPool2D(),
        ]
    for layer in layers2d:
        check_layer_forward(layer, (1, 2, 10, 10))

    layers3d = [
        nn.MaxPool3D(),
        nn.MaxPool3D((3, 3, 3)),
        nn.MaxPool3D(3, 2),
        nn.AvgPool3D(),
        nn.AvgPool3D(count_include_pad=False),
        nn.GlobalAvgPool3D(),
        ]
    for layer in layers3d:
        check_layer_forward(layer, (1, 2, 10, 10, 10))

    # test ceil_mode
    x = mx.nd.zeros((2, 2, 10, 10))

    layer = nn.MaxPool2D(3, ceil_mode=False)
    layer.collect_params().initialize()
    assert (layer(x).shape==(2, 2, 3, 3))

    layer = nn.MaxPool2D(3, ceil_mode=True)
    layer.collect_params().initialize()
    assert (layer(x).shape==(2, 2, 4, 4))
Example #5
0
    def __init__(self,
                 nclass,
                 depth,
                 num_stages=4,
                 pretrained_base=True,
                 num_segments=1,
                 spatial_strides=(1, 2, 2, 2),
                 temporal_strides=(1, 1, 1, 1),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 conv1_kernel_t=5,
                 conv1_stride_t=2,
                 pool1_kernel_t=1,
                 pool1_stride_t=2,
                 inflate_freq=(1, 1, 1, 1),
                 inflate_stride=(1, 1, 1, 1),
                 inflate_style='3x1x1',
                 nonlocal_stages=(-1, ),
                 nonlocal_freq=(0, 1, 1, 0),
                 nonlocal_cfg=None,
                 bn_eval=True,
                 bn_frozen=False,
                 partial_bn=False,
                 frozen_stages=-1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(I3D_ResNetV1, self).__init__()

        if depth not in self.arch_settings:
            raise KeyError('invalid depth {} for resnet'.format(depth))

        self.nclass = nclass
        self.depth = depth
        self.num_stages = num_stages
        self.pretrained_base = pretrained_base
        self.num_segments = num_segments
        self.spatial_strides = spatial_strides
        self.temporal_strides = temporal_strides
        self.dilations = dilations
        assert len(spatial_strides) == len(temporal_strides) == len(
            dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.inflate_freqs = inflate_freq if not isinstance(
            inflate_freq, int) else (inflate_freq, ) * num_stages
        self.inflate_style = inflate_style
        self.nonlocal_stages = nonlocal_stages
        self.nonlocal_freqs = nonlocal_freq if not isinstance(
            nonlocal_freq, int) else (nonlocal_freq, ) * num_stages
        self.nonlocal_cfg = nonlocal_cfg
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.partial_bn = partial_bn
        self.frozen_stages = frozen_stages
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = 64

        self.first_stage = nn.HybridSequential(prefix='')
        self.first_stage.add(
            nn.Conv3D(in_channels=3,
                      channels=64,
                      kernel_size=(conv1_kernel_t, 7, 7),
                      strides=(conv1_stride_t, 2, 2),
                      padding=((conv1_kernel_t - 1) // 2, 3, 3),
                      use_bias=False))
        self.first_stage.add(
            norm_layer(in_channels=64,
                       **({} if norm_kwargs is None else norm_kwargs)))
        self.first_stage.add(nn.Activation('relu'))
        self.first_stage.add(
            nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3),
                         strides=(pool1_stride_t, 2, 2),
                         padding=(pool1_kernel_t // 2, 1, 1)))

        self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1),
                                  strides=(2, 1, 1),
                                  padding=(0, 0, 0))

        self.res_layers = nn.HybridSequential(prefix='')
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            temporal_stride = temporal_strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            layer_name = 'layer{}_'.format(i + 1)

            res_layer = make_res_layer(self.block,
                                       self.inplanes,
                                       planes,
                                       num_blocks,
                                       spatial_stride=spatial_stride,
                                       temporal_stride=temporal_stride,
                                       dilation=dilation,
                                       inflate_freq=self.inflate_freqs[i],
                                       inflate_style=self.inflate_style,
                                       nonlocal_freq=self.nonlocal_freqs[i],
                                       nonlocal_cfg=self.nonlocal_cfg
                                       if i in self.nonlocal_stages else None,
                                       norm_layer=norm_layer,
                                       norm_kwargs=norm_kwargs,
                                       layer_name=layer_name)
            self.inplanes = planes * self.block.expansion
            self.res_layers.add(res_layer)

        self.feat_dim = self.block.expansion * 64 * 2**(
            len(self.stage_blocks) - 1)

        # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed.
        # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g.
        # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0)
        # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner.
        self.st_avg = nn.GlobalAvgPool3D()

        self.head = nn.HybridSequential(prefix='')
        self.head.add(nn.Dropout(rate=self.dropout_ratio))
        self.fc = nn.Dense(in_units=self.feat_dim,
                           units=nclass,
                           weight_initializer=init.Normal(sigma=self.init_std))
        self.head.add(self.fc)

        self.init_weights()
Example #6
0
    def __init__(self,
                 nclass,
                 block=Bottleneck,
                 layers=None,
                 pretrained=False,
                 pretrained_base=False,
                 num_segments=1,
                 num_crop=1,
                 bn_eval=True,
                 bn_frozen=False,
                 partial_bn=False,
                 frozen_stages=-1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 alpha=8,
                 beta_inv=8,
                 fusion_conv_channel_ratio=2,
                 fusion_kernel_size=5,
                 width_per_group=64,
                 num_groups=1,
                 slow_temporal_stride=16,
                 fast_temporal_stride=2,
                 slow_frames=4,
                 fast_frames=32,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(SlowFast, self).__init__()
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.alpha = alpha
        self.beta_inv = beta_inv
        self.fusion_conv_channel_ratio = fusion_conv_channel_ratio
        self.fusion_kernel_size = fusion_kernel_size
        self.width_per_group = width_per_group
        self.num_groups = num_groups
        self.dim_inner = self.num_groups * self.width_per_group
        self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio
        self.slow_temporal_stride = slow_temporal_stride
        self.fast_temporal_stride = fast_temporal_stride
        self.slow_frames = slow_frames
        self.fast_frames = fast_frames

        with self.name_scope():
            # build fast pathway
            fast = nn.HybridSequential(prefix='fast_')
            with fast.name_scope():
                self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv,
                                            kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False)
                self.fast_bn1 = norm_layer(in_channels=self.width_per_group // self.beta_inv,
                                           **({} if norm_kwargs is None else norm_kwargs))
                self.fast_relu = nn.Activation('relu')
                self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))
            self.fast_res2 = self._make_layer_fast(inplanes=self.width_per_group // self.beta_inv,
                                                   planes=self.dim_inner // self.beta_inv,
                                                   num_blocks=layers[0],
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res2_')
            self.fast_res3 = self._make_layer_fast(inplanes=self.width_per_group * 4 // self.beta_inv,
                                                   planes=self.dim_inner * 2 // self.beta_inv,
                                                   num_blocks=layers[1],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res3_')
            self.fast_res4 = self._make_layer_fast(inplanes=self.width_per_group * 8 // self.beta_inv,
                                                   planes=self.dim_inner * 4 // self.beta_inv,
                                                   num_blocks=layers[2],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res4_')
            self.fast_res5 = self._make_layer_fast(inplanes=self.width_per_group * 16 // self.beta_inv,
                                                   planes=self.dim_inner * 8 // self.beta_inv,
                                                   num_blocks=layers[3],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res5_')

            # build lateral connections
            self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_')
            with self.lateral_p1.name_scope():
                self.lateral_p1.add(nn.Conv3D(in_channels=self.width_per_group // self.beta_inv,
                                              channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio,
                                              kernel_size=(self.fusion_kernel_size, 1, 1),
                                              strides=(self.alpha, 1, 1),
                                              padding=(self.fusion_kernel_size // 2, 0, 0),
                                              use_bias=False))
                self.lateral_p1.add(norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio,
                                               **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_p1.add(nn.Activation('relu'))

            self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_')
            with self.lateral_res2.name_scope():
                self.lateral_res2.add(nn.Conv3D(in_channels=self.width_per_group * 4 // self.beta_inv,
                                                channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res2.add(norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res2.add(nn.Activation('relu'))

            self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_')
            with self.lateral_res3.name_scope():
                self.lateral_res3.add(nn.Conv3D(in_channels=self.width_per_group * 8 // self.beta_inv,
                                                channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res3.add(norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res3.add(nn.Activation('relu'))

            self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_')
            with self.lateral_res4.name_scope():
                self.lateral_res4.add(nn.Conv3D(in_channels=self.width_per_group * 16 // self.beta_inv,
                                                channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res4.add(norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res4.add(nn.Activation('relu'))

            # build slow pathway
            slow = nn.HybridSequential(prefix='slow_')
            with slow.name_scope():
                self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group,
                                            kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False)
                self.slow_bn1 = norm_layer(in_channels=self.width_per_group,
                                           **({} if norm_kwargs is None else norm_kwargs))
                self.slow_relu = nn.Activation('relu')
                self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))
            self.slow_res2 = self._make_layer_slow(inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio,
                                                   planes=self.dim_inner,
                                                   num_blocks=layers[0],
                                                   head_conv=1,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res2_')
            self.slow_res3 = self._make_layer_slow(inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 2,
                                                   num_blocks=layers[1],
                                                   strides=2,
                                                   head_conv=1,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res3_')
            self.slow_res4 = self._make_layer_slow(inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 4,
                                                   num_blocks=layers[2],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res4_')
            self.slow_res5 = self._make_layer_slow(inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 8,
                                                   num_blocks=layers[3],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res5_')

            # build classifier
            self.avg = nn.GlobalAvgPool3D()
            self.dp = nn.Dropout(rate=self.dropout_ratio)
            self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32
            self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True)

            self.initialize(init.MSRAPrelu(), ctx=ctx)
Example #7
0
    def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1,
                 norm_kwargs=None, partial_bn=False, pretrained_base=True,
                 dropout_ratio=0.5, init_std=0.01,
                 ctx=None, **kwargs):
        super(I3D_InceptionV1, self).__init__(**kwargs)

        self.num_segments = num_segments
        self.feat_dim = 1024
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')

            self.features.add(_make_basic_conv(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)))

            if partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.features.add(_make_basic_conv(in_channels=64, channels=64, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(_make_basic_conv(in_channels=64, channels=192, kernel_size=3, padding=(1, 1, 1), norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)))

            self.features.add(_make_Mixed_3a(192, 32, 'Mixed_3a_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_3b(256, 64, 'Mixed_3b_', norm_layer, norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=3, strides=(2, 2, 2), padding=(1, 1, 1)))

            self.features.add(_make_Mixed_4a(480, 64, 'Mixed_4a_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4b(512, 64, 'Mixed_4b_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4c(512, 64, 'Mixed_4c_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4d(512, 64, 'Mixed_4d_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4e(528, 128, 'Mixed_4e_', norm_layer, norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=2, strides=(2, 2, 2)))

            self.features.add(_make_Mixed_5a(832, 128, 'Mixed_5a_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_5b(832, 128, 'Mixed_5b_', norm_layer, norm_kwargs))
            self.features.add(nn.GlobalAvgPool3D())

            self.head = nn.HybridSequential(prefix='')
            self.head.add(nn.Dropout(rate=self.dropout_ratio))
            self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std))
            self.head.add(self.output)

            self.features.initialize(ctx=ctx)
            self.head.initialize(ctx=ctx)

            if pretrained_base:
                inceptionv1_2d = googlenet(pretrained=True)
                weights2d = inceptionv1_2d.collect_params()
                weights3d = self.collect_params()
                assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.'

                dict2d = {}
                for key_id, key_name in enumerate(weights2d.keys()):
                    dict2d[key_id] = key_name

                dict3d = {}
                for key_id, key_name in enumerate(weights3d.keys()):
                    dict3d[key_id] = key_name

                dict_transform = {}
                for key_id, key_name in dict3d.items():
                    dict_transform[dict2d[key_id]] = key_name

                cnt = 0
                for key2d, key3d in dict_transform.items():
                    if 'conv' in key3d:
                        temporal_dim = weights3d[key3d].shape[2]
                        temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2)
                        inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim
                        assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(inflated_2d)
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'batchnorm' in key3d:
                        assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(weights2d[key2d].data())
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'dense' in key3d:
                        cnt += 1
                        print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape)

                assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
Example #8
0
    def __init__(self, nclass=1000, pretrained=False, pretrained_base=True,
                 num_segments=1, num_crop=1, feat_ext=False,
                 dropout_ratio=0.5, init_std=0.01, partial_bn=False,
                 ctx=None, norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
        super(I3D_InceptionV3, self).__init__(**kwargs)

        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_dim = 2048
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.feat_ext = feat_ext

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(_make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0),
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            if partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.features.add(_make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0),
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(_make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1,
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0)))
            self.features.add(_make_basic_conv(in_channels=64, channels=80, kernel_size=1,
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(_make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0),
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0)))
            self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs))
            self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs))
            self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs))
            self.features.add(_make_B('B_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs))
            self.features.add(_make_D('D_', norm_layer, norm_kwargs))
            self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs))
            self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs))
            self.features.add(nn.GlobalAvgPool3D())

            self.head = nn.HybridSequential(prefix='')
            self.head.add(nn.Dropout(rate=self.dropout_ratio))
            self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std))
            self.head.add(self.output)

            self.features.initialize(ctx=ctx)
            self.head.initialize(ctx=ctx)

            if pretrained_base and not pretrained:
                inceptionv3_2d = inception_v3(pretrained=True)
                weights2d = inceptionv3_2d.collect_params()
                weights3d = self.collect_params()
                assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.'

                dict2d = {}
                for key_id, key_name in enumerate(weights2d.keys()):
                    dict2d[key_id] = key_name

                dict3d = {}
                for key_id, key_name in enumerate(weights3d.keys()):
                    dict3d[key_id] = key_name

                dict_transform = {}
                for key_id, key_name in dict3d.items():
                    dict_transform[dict2d[key_id]] = key_name

                cnt = 0
                for key2d, key3d in dict_transform.items():
                    if 'conv' in key3d:
                        temporal_dim = weights3d[key3d].shape[2]
                        temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2)
                        inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim
                        assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(inflated_2d)
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'batchnorm' in key3d:
                        assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(weights2d[key2d].data())
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'dense' in key3d:
                        cnt += 1
                        print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape)

                assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'