Beispiel #1
0
    def __init__(self,
                 nclass,
                 pretrained_base=True,
                 input_channel=3,
                 partial_bn=True,
                 dropout_ratio=0.8,
                 init_std=0.001,
                 feat_dim=2048,
                 num_segments=1,
                 num_crop=1,
                 **kwargs):
        super(ActionRecInceptionV3, self).__init__()
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_dim = feat_dim

        pretrained_model = inception_v3(pretrained=pretrained_base,
                                        partial_bn=partial_bn,
                                        **kwargs)
        inception_features = pretrained_model.features
        if input_channel == 3:
            self.features = inception_features
        else:
            self.features = nn.HybridSequential(prefix='')
            with pretrained_model.name_scope():
                if 'norm_layer' not in dir():
                    norm_layer = nn.BatchNorm
                else:
                    if norm_layer is None:
                        norm_layer = nn.BatchNorm
                self.features.add(
                    _make_basic_conv(
                        in_channels=input_channel,
                        channels=32,
                        kernel_size=3,
                        strides=2,
                        norm_layer=norm_layer,
                        norm_kwargs=None,
                        weight_initializer=mx.init.Xavier(magnitude=2)))
                self.features[0].initialize()
                for layer in inception_features[1:]:
                    self.features.add(layer)

        def update_dropout_ratio(block):
            if isinstance(block, nn.basic_layers.Dropout):
                block._rate = self.dropout_ratio

        self.apply(update_dropout_ratio)
        self.output = nn.Dense(
            units=nclass,
            in_units=self.feat_dim,
            weight_initializer=init.Normal(sigma=self.init_std))
        self.output.initialize()
Beispiel #2
0
    def __init__(self,
                 nclass=1000,
                 pretrained=False,
                 pretrained_base=True,
                 num_segments=1,
                 num_crop=1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 partial_bn=False,
                 ctx=None,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 **kwargs):
        super(I3D_InceptionV3, self).__init__(**kwargs)

        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_dim = 2048
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(
                _make_basic_conv(in_channels=3,
                                 channels=32,
                                 kernel_size=3,
                                 strides=2,
                                 padding=(1, 0, 0),
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            if partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.features.add(
                _make_basic_conv(in_channels=32,
                                 channels=32,
                                 kernel_size=3,
                                 padding=(1, 0, 0),
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                _make_basic_conv(in_channels=32,
                                 channels=64,
                                 kernel_size=3,
                                 padding=1,
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                nn.MaxPool3D(pool_size=3, strides=(1, 2, 2),
                             padding=(1, 0, 0)))
            self.features.add(
                _make_basic_conv(in_channels=64,
                                 channels=80,
                                 kernel_size=1,
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                _make_basic_conv(in_channels=80,
                                 channels=192,
                                 kernel_size=3,
                                 padding=(1, 0, 0),
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                nn.MaxPool3D(pool_size=3, strides=(1, 2, 2),
                             padding=(1, 0, 0)))
            self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs))
            self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs))
            self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs))
            self.features.add(_make_B('B_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 128, 'C1_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_C(768, 160, 'C2_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_C(768, 160, 'C3_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_C(768, 192, 'C4_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_D('D_', norm_layer, norm_kwargs))
            self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs))
            self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs))
            self.features.add(nn.GlobalAvgPool3D())

            self.head = nn.HybridSequential(prefix='')
            self.head.add(nn.Dropout(rate=self.dropout_ratio))
            self.output = nn.Dense(
                units=nclass,
                in_units=self.feat_dim,
                weight_initializer=init.Normal(sigma=self.init_std))
            self.head.add(self.output)

            self.features.initialize(ctx=ctx)
            self.head.initialize(ctx=ctx)

            if pretrained_base and not pretrained:
                inceptionv3_2d = inception_v3(pretrained=True)
                weights2d = inceptionv3_2d.collect_params()
                weights3d = self.collect_params()
                assert len(weights2d.keys()) == len(
                    weights3d.keys()), 'Number of parameters should be same.'

                dict2d = {}
                for key_id, key_name in enumerate(weights2d.keys()):
                    dict2d[key_id] = key_name

                dict3d = {}
                for key_id, key_name in enumerate(weights3d.keys()):
                    dict3d[key_id] = key_name

                dict_transform = {}
                for key_id, key_name in dict3d.items():
                    dict_transform[dict2d[key_id]] = key_name

                cnt = 0
                for key2d, key3d in dict_transform.items():
                    if 'conv' in key3d:
                        temporal_dim = weights3d[key3d].shape[2]
                        temporal_2d = nd.expand_dims(weights2d[key2d].data(),
                                                     axis=2)
                        inflated_2d = nd.broadcast_to(
                            temporal_2d, shape=[0, 0, temporal_dim, 0, 0
                                                ]) / temporal_dim
                        assert inflated_2d.shape == weights3d[
                            key3d].shape, 'the shape of %s and %s does not match. ' % (
                                key2d, key3d)
                        weights3d[key3d].set_data(inflated_2d)
                        cnt += 1
                        print('%s is done with shape: ' % (key3d),
                              weights3d[key3d].shape)
                    if 'batchnorm' in key3d:
                        assert weights2d[key2d].shape == weights3d[
                            key3d].shape, 'the shape of %s and %s does not match. ' % (
                                key2d, key3d)
                        weights3d[key3d].set_data(weights2d[key2d].data())
                        cnt += 1
                        print('%s is done with shape: ' % (key3d),
                              weights3d[key3d].shape)
                    if 'dense' in key3d:
                        cnt += 1
                        print('%s is skipped with shape: ' % (key3d),
                              weights3d[key3d].shape)

                assert cnt == len(
                    weights2d.keys()
                ), 'Not all parameters have been ported, check the initialization.'