def __init__(self, nclass, pretrained_base=True, partial_bn=True, dropout_ratio=0.8, init_std=0.001, feat_dim=1024, num_segments=1, num_crop=1, **kwargs): super(ActionRecInceptionV1, self).__init__() self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_dim = feat_dim pretrained_model = googlenet(pretrained=pretrained_base, partial_bn=partial_bn, **kwargs) self.conv1 = pretrained_model.conv1 self.maxpool1 = pretrained_model.maxpool1 self.conv2 = pretrained_model.conv2 self.conv3 = pretrained_model.conv3 self.maxpool2 = pretrained_model.maxpool2 self.inception3a = pretrained_model.inception3a self.inception3b = pretrained_model.inception3b self.maxpool3 = pretrained_model.maxpool3 self.inception4a = pretrained_model.inception4a self.inception4b = pretrained_model.inception4b self.inception4c = pretrained_model.inception4c self.inception4d = pretrained_model.inception4d self.inception4e = pretrained_model.inception4e self.maxpool4 = pretrained_model.maxpool4 self.inception5a = pretrained_model.inception5a self.inception5b = pretrained_model.inception5b self.avgpool = nn.AvgPool2D(pool_size=7) self.dropout = nn.Dropout(self.dropout_ratio) self.output = nn.Dense( units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.output.initialize()
def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1, norm_kwargs=None, partial_bn=False, pretrained_base=True, dropout_ratio=0.5, init_std=0.01, ctx=None, **kwargs): super(I3D_InceptionV1, self).__init__(**kwargs) self.num_segments = num_segments self.feat_dim = 1024 self.dropout_ratio = dropout_ratio self.init_std = init_std with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=64, channels=64, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=64, channels=192, kernel_size=3, padding=(1, 1, 1), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) self.features.add(_make_Mixed_3a(192, 32, 'Mixed_3a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_3b(256, 64, 'Mixed_3b_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(2, 2, 2), padding=(1, 1, 1))) self.features.add(_make_Mixed_4a(480, 64, 'Mixed_4a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4b(512, 64, 'Mixed_4b_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4c(512, 64, 'Mixed_4c_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4d(512, 64, 'Mixed_4d_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4e(528, 128, 'Mixed_4e_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=2, strides=(2, 2, 2))) self.features.add(_make_Mixed_5a(832, 128, 'Mixed_5a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_5b(832, 128, 'Mixed_5b_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base: inceptionv1_2d = googlenet(pretrained=True) weights2d = inceptionv1_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'