def __init__(self, in_channel, **kwargs): super(TCL, self).__init__() self.branch1 = nn.HybridSequential() self.branch1.add( nn.Conv3D(in_channels=in_channel, channels=32, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), # nn.BatchNorm(), nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1))) self.branch2 = nn.HybridSequential() self.branch2.add( nn.Conv3D(in_channels=in_channel, channels=32, kernel_size=(5, 1, 1), strides=(1, 1, 1), padding=(2, 0, 0), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), # nn.BatchNorm(), nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1)))
def __init__(self,**kwargs): super(Refiner_hybrid,self).__init__(**kwargs) self.layer1 = nn.HybridSequential() self.layer1.add( nn.Conv3D(32, kernel_size=4, padding=2), nn.BatchNorm(in_channels=32), nn.LeakyReLU(.2), nn.MaxPool3D(pool_size=2) ) self.layer2 = nn.HybridSequential() self.layer2.add( nn.Conv3D(64, kernel_size=4, padding=2), nn.BatchNorm(in_channels=64), nn.LeakyReLU(.2), nn.MaxPool3D(pool_size=2) ) self.layer3 = nn.HybridSequential() self.layer3.add( nn.Conv3D(128, kernel_size=4, padding=2), nn.BatchNorm(in_channels=128), nn.LeakyReLU(.2), nn.MaxPool3D(pool_size=2) ) self.layer4 = nn.HybridSequential() self.layer4.add( nn.Dense(2048,activation = 'relu') ) self.layer5 = nn.HybridSequential() self.layer5.add( nn.Dense(8192,activation='relu') ) self.layer6 = nn.HybridSequential() self.layer6.add( nn.Conv3DTranspose(64, kernel_size=4, strides=2, padding=1, use_bias=False ), nn.BatchNorm(in_channels = 64), nn.Activation('relu') ) self.layer7 = nn.HybridSequential() self.layer7.add( nn.Conv3DTranspose(32, kernel_size=4, strides=2, padding=1, use_bias=False), nn.BatchNorm(in_channels =32), nn.Activation('relu') ) self.layer8 = nn.HybridSequential() self.layer8.add( nn.Conv3DTranspose(1, kernel_size=4, strides=2, padding=1, use_bias=False), nn.Activation('sigmoid') )
def __init__(self, block, layers, channels, classes=1000, thumbnail=False, caption_length=50, **kwargs): super(ResNetV1, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.caption_length = caption_length self.features = nn.HybridSequential(prefix='') if thumbnail: self.features.add(_conv3x3(channels[0], 1, 0)) else: self.features.add( nn.Conv3D(channels[0], 7, 2, 3, use_bias=False)) self.features.add(nn.BatchNorm()) self.features.add(nn.Activation('relu')) self.features.add(nn.MaxPool3D(3, 2, 1)) for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(block, num_layer, channels[i + 1], stride, i + 1, in_channels=channels[i])) self.features.add(nn.GlobalAvgPool3D()) #self.features.add(nn.Dense(classes, in_units=in_channels)) self.output = nn.Dense(caption_length * caption_length)
def _make_3d_feature(self, config_3d_conv, config_3d_pool, batch_normal): featurizer = nn.HybridSequential(prefix='') conv_layer, conv_channels = config_3d_conv pool_size, pool_stride, pool_padding = config_3d_pool assert len(conv_layer) == len(conv_channels) == len(pool_size) == len( pool_stride) == len(pool_padding) for i, num in enumerate(conv_layer): for _ in range(num): featurizer.add( nn.Conv3D(channels=conv_channels[i], kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1), weight_initializer=init.Xavier( rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zero')) if batch_normal: featurizer.add(nn.BatchNorm()) featurizer.add(nn.Activation('relu')) featurizer.add( nn.MaxPool3D(pool_size=pool_size[i], strides=pool_stride[i], padding=pool_padding[i])) # flatten to (N, 8192) featurizer.add(nn.Flatten()) return featurizer
def test_pool(): layers1d = [ nn.MaxPool1D(), nn.MaxPool1D(3), nn.MaxPool1D(3, 2), nn.AvgPool1D(), nn.AvgPool1D(count_include_pad=False), nn.GlobalAvgPool1D(), ] for layer in layers1d: check_layer_forward(layer, (1, 2, 10)) layers2d = [ nn.MaxPool2D(), nn.MaxPool2D((3, 3)), nn.MaxPool2D(3, 2), nn.AvgPool2D(), nn.AvgPool2D(count_include_pad=False), nn.GlobalAvgPool2D(), ] for layer in layers2d: check_layer_forward(layer, (1, 2, 10, 10)) layers3d = [ nn.MaxPool3D(), nn.MaxPool3D((3, 3, 3)), nn.MaxPool3D(3, 2), nn.AvgPool3D(), nn.AvgPool3D(count_include_pad=False), nn.GlobalAvgPool3D(), ] for layer in layers3d: check_layer_forward(layer, (1, 2, 10, 10, 10)) # test ceil_mode x = mx.nd.zeros((2, 2, 10, 10)) layer = nn.MaxPool2D(3, ceil_mode=False) layer.collect_params().initialize() assert (layer(x).shape==(2, 2, 3, 3)) layer = nn.MaxPool2D(3, ceil_mode=True) layer.collect_params().initialize() assert (layer(x).shape==(2, 2, 4, 4))
def __init__(self, dr_rate, **kwargs): super(LipNet, self).__init__(**kwargs) with self.name_scope(): self.conv1 = nn.Conv3D(32, kernel_size=(3, 5, 5), strides=(1, 2, 2), padding=(1, 2, 2)) self.bn1 = nn.InstanceNorm(in_channels=32) self.dr1 = nn.Dropout(dr_rate, axes=(1, 2)) self.pool1 = nn.MaxPool3D((1, 2, 2), (1, 2, 2)) self.conv2 = nn.Conv3D(64, kernel_size=(3, 5, 5), strides=(1, 1, 1), padding=(1, 2, 2)) self.bn2 = nn.InstanceNorm(in_channels=64) self.dr2 = nn.Dropout(dr_rate, axes=(1, 2)) self.pool2 = nn.MaxPool3D((1, 2, 2), (1, 2, 2)) self.conv3 = nn.Conv3D(96, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 2, 2)) self.bn3 = nn.InstanceNorm(in_channels=96) self.dr3 = nn.Dropout(dr_rate, axes=(1, 2)) self.pool3 = nn.MaxPool3D((1, 2, 2), (1, 2, 2)) self.gru1 = rnn.GRU(256, bidirectional=True) self.gru2 = rnn.GRU(256, bidirectional=True) self.dense = nn.Dense(27+1, flatten=False)
def __init__(self, out_channels, **kwargs): super(TransitionBlockDown, self).__init__(**kwargs) self.ops = nn.HybridSequential() self.ops.add( nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=out_channels, kernel_size=1, strides=1, use_bias=False), nn.MaxPool3D(pool_size=2, strides=2))
def __init__(self, c1, c2): super(Reduction, self).__init__() ''' 3 convolutions, which go from channels=c1->c2 ''' self.conv1 = BasicConv(c1) self.conv2 = BasicConv(c2) self.conv3 = BasicConv(c2) self.pool = nn.MaxPool3D(pool_size=2, strides=1)
def __init__(self, nclass, input_channel=3, batch_normal=True, dropout_ratio=0.8, init_std=0.001, **kwargs): super(P3D, self).__init__() self.nclass = nclass self.dropout_ratio = dropout_ratio self.init_std = init_std self.expansion = 1 with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=input_channel, channels=64, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = nn.BatchNorm(in_channels=64) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.conv2 = nn.HybridSequential() self.conv2.add( P3D_block('A', 64, 64 * self.expansion, 2), P3D_block('B', 64 * self.expansion, 64 * self.expansion), P3D_block('C', 64 * self.expansion, 64 * self.expansion)) self.conv3 = nn.HybridSequential() self.conv3.add( P3D_block('A', 64 * self.expansion, 128 * self.expansion, 2), P3D_block('B', 128 * self.expansion, 128 * self.expansion), P3D_block('C', 128 * self.expansion, 128 * self.expansion), P3D_block('A', 128 * self.expansion, 128 * self.expansion)) self.conv4 = nn.HybridSequential() self.conv4.add( P3D_block('B', 128 * self.expansion, 256 * self.expansion, 2), P3D_block('C', 256 * self.expansion, 256 * self.expansion), P3D_block('A', 256 * self.expansion, 256 * self.expansion), P3D_block('B', 256 * self.expansion, 256 * self.expansion), P3D_block('C', 256 * self.expansion, 256 * self.expansion), P3D_block('A', 256 * self.expansion, 256 * self.expansion)) self.conv5 = nn.HybridSequential() self.conv5.add( P3D_block('B', 256 * self.expansion, 512 * self.expansion, 2), P3D_block('C', 512 * self.expansion, 512 * self.expansion), P3D_block('A', 512 * self.expansion, 512)) self.avg_pool = nn.AvgPool3D(pool_size=(1, 3, 3)) self.output = nn.Dense( in_units=512, units=nclass, weight_initializer=init.Normal(sigma=init_std))
def _make_branch(use_pool=None, *conv_settings): out = nn.HybridSequential(prefix='') if use_pool == 'max': # 1 is for the depth dimension for video inflation out.add( nn.MaxPool3D(pool_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1))) setting_names = ['channels', 'kernel_size', 'strides', 'padding'] for setting in conv_settings: kwargs = {} for i, value in enumerate(setting): if value is not None: kwargs[setting_names[i]] = value out.add(_make_unit3d(**kwargs)) return out
def _make_branch(use_pool, norm_layer, norm_kwargs, *conv_settings): out = nn.HybridSequential(prefix='') if use_pool == 'avg': out.add(nn.AvgPool3D(pool_size=3, strides=1, padding=1)) elif use_pool == 'max': out.add(nn.MaxPool3D(pool_size=3, strides=1, padding=1)) setting_names = ['in_channels', 'channels', 'kernel_size', 'strides', 'padding'] for setting in conv_settings: kwargs = {} for i, value in enumerate(setting): if value is not None: if setting_names[i] == 'in_channels': in_channels = value elif setting_names[i] == 'channels': channels = value else: kwargs[setting_names[i]] = value out.add(_make_basic_conv(in_channels, channels, norm_layer, norm_kwargs, **kwargs)) return out
def __init__(self, nclass, input_channel=3, batch_normal=True, dropout_ratio=0.8, init_std=0.001, **kwargs): super(FstCN, self).__init__() self.nclass = nclass self.new_length = 16 + 1 #self.feat_dim = 4096 self.dropout_ratio = dropout_ratio self.init_std = init_std # self.config_3d_layer = [2,2,2,2] # self.config_3d_temporal_stride = [1,2,2,2] with self.name_scope(): self.SCL1 = nn.HybridSequential() self.SCL1.add( nn.Conv3D(in_channels=3, channels=96, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), nn.BatchNorm(), nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2))) self.SCL2 = nn.HybridSequential() self.SCL2.add( nn.Conv3D(in_channels=96, channels=256, kernel_size=(1, 5, 5), strides=(1, 2, 2), padding=(0, 2, 2), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), nn.BatchNorm(), nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2))) self.SCL3 = nn.HybridSequential() self.SCL3.add( nn.Conv3D(in_channels=256, channels=512, kernel_size=(1, 3, 3), strides=(1, 1, 1), padding=(0, 1, 1), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), nn.BatchNorm()) self.SCL4 = nn.HybridSequential() self.SCL4.add( nn.Conv3D(in_channels=512, channels=512, kernel_size=(1, 3, 3), strides=(1, 1, 1), padding=(0, 1, 1), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), nn.BatchNorm()) self.Parallel_temporal = nn.HybridSequential() self.Parallel_temporal.add( nn.Conv3D(in_channels=512, channels=128, kernel_size=(1, 3, 3), strides=(1, 1, 1), padding=(0, 1, 1), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), nn.BatchNorm(), nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 3, 3)), TCL(128)) self.Parallel_spatial = nn.HybridSequential() self.Parallel_spatial.add( nn.Conv2D(in_channels=512, channels=128, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), weight_initializer=init.Xavier(), bias_initializer='zero'), nn.Activation('relu'), nn.MaxPool2D(pool_size=(3, 3), strides=(3, 3))) self.tem_fc = nn.HybridSequential() self.tem_fc.add( nn.Dense(in_units=8192, units=4096, weight_initializer=init.Normal(sigma=init_std)), nn.Dropout(rate=dropout_ratio), nn.Dense(in_units=4096, units=2048, weight_initializer=init.Normal(sigma=init_std)), ) self.spa_fc = nn.HybridSequential() self.spa_fc.add( nn.Dense(in_units=2048, units=4096, weight_initializer=init.Normal(sigma=init_std)), nn.Dropout(rate=dropout_ratio), nn.Dense(in_units=4096, units=2048, weight_initializer=init.Normal(sigma=init_std)), ) self.fc = nn.Dense(in_units=4096, units=2048, weight_initializer=init.Normal(sigma=init_std)) self.out = nn.Dense(in_units=2048, units=nclass, weight_initializer=init.Normal(sigma=init_std))
def __init__(self, nclass, block=Bottleneck, layers=None, pretrained=False, pretrained_base=False, num_segments=1, num_crop=1, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, alpha=8, beta_inv=8, fusion_conv_channel_ratio=2, fusion_kernel_size=5, width_per_group=64, num_groups=1, slow_temporal_stride=16, fast_temporal_stride=2, slow_frames=4, fast_frames=32, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(SlowFast, self).__init__() self.num_segments = num_segments self.num_crop = num_crop self.dropout_ratio = dropout_ratio self.init_std = init_std self.alpha = alpha self.beta_inv = beta_inv self.fusion_conv_channel_ratio = fusion_conv_channel_ratio self.fusion_kernel_size = fusion_kernel_size self.width_per_group = width_per_group self.num_groups = num_groups self.dim_inner = self.num_groups * self.width_per_group self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio self.slow_temporal_stride = slow_temporal_stride self.fast_temporal_stride = fast_temporal_stride self.slow_frames = slow_frames self.fast_frames = fast_frames with self.name_scope(): # build fast pathway fast = nn.HybridSequential(prefix='fast_') with fast.name_scope(): self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv, kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False) self.fast_bn1 = norm_layer(in_channels=self.width_per_group // self.beta_inv, **({} if norm_kwargs is None else norm_kwargs)) self.fast_relu = nn.Activation('relu') self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.fast_res2 = self._make_layer_fast(inplanes=self.width_per_group // self.beta_inv, planes=self.dim_inner // self.beta_inv, num_blocks=layers[0], head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res2_') self.fast_res3 = self._make_layer_fast(inplanes=self.width_per_group * 4 // self.beta_inv, planes=self.dim_inner * 2 // self.beta_inv, num_blocks=layers[1], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res3_') self.fast_res4 = self._make_layer_fast(inplanes=self.width_per_group * 8 // self.beta_inv, planes=self.dim_inner * 4 // self.beta_inv, num_blocks=layers[2], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res4_') self.fast_res5 = self._make_layer_fast(inplanes=self.width_per_group * 16 // self.beta_inv, planes=self.dim_inner * 8 // self.beta_inv, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res5_') # build lateral connections self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_') with self.lateral_p1.name_scope(): self.lateral_p1.add(nn.Conv3D(in_channels=self.width_per_group // self.beta_inv, channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_p1.add(norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_p1.add(nn.Activation('relu')) self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_') with self.lateral_res2.name_scope(): self.lateral_res2.add(nn.Conv3D(in_channels=self.width_per_group * 4 // self.beta_inv, channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res2.add(norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res2.add(nn.Activation('relu')) self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_') with self.lateral_res3.name_scope(): self.lateral_res3.add(nn.Conv3D(in_channels=self.width_per_group * 8 // self.beta_inv, channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res3.add(norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res3.add(nn.Activation('relu')) self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_') with self.lateral_res4.name_scope(): self.lateral_res4.add(nn.Conv3D(in_channels=self.width_per_group * 16 // self.beta_inv, channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res4.add(norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res4.add(nn.Activation('relu')) # build slow pathway slow = nn.HybridSequential(prefix='slow_') with slow.name_scope(): self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.slow_bn1 = norm_layer(in_channels=self.width_per_group, **({} if norm_kwargs is None else norm_kwargs)) self.slow_relu = nn.Activation('relu') self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.slow_res2 = self._make_layer_slow(inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio, planes=self.dim_inner, num_blocks=layers[0], head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res2_') self.slow_res3 = self._make_layer_slow(inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio, planes=self.dim_inner * 2, num_blocks=layers[1], strides=2, head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res3_') self.slow_res4 = self._make_layer_slow(inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio, planes=self.dim_inner * 4, num_blocks=layers[2], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res4_') self.slow_res5 = self._make_layer_slow(inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio, planes=self.dim_inner * 8, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res5_') # build classifier self.avg = nn.GlobalAvgPool3D() self.dp = nn.Dropout(rate=self.dropout_ratio) self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32 self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True) self.initialize(init.MSRAPrelu(), ctx=ctx)
def __init__(self, nclass, block, layers, shortcut_type='B', block_design=('A', 'B', 'C'), dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, init_std=0.001, ctx=None, partial_bn=False, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(P3D, self).__init__() self.shortcut_type = shortcut_type self.block_design = block_design self.partial_bn = partial_bn self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.inplanes = 64 self.feat_dim = 512 * block.expansion with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=64, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = norm_layer( in_channels=64, **({} if norm_kwargs is None else norm_kwargs)) self.relu = nn.Activation('relu') self.pool = nn.MaxPool3D(pool_size=(2, 3, 3), strides=2, padding=(0, 1, 1)) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=0) if self.partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True # 3D layers are only for (layers1, layers2 and layers3), layers4 is C2D self.depth_3d = sum(layers[:3]) self.layer_cnt = 0 self.layer1 = self._make_res_layer(block=block, planes=64, blocks=layers[0], layer_name='layer1_') self.layer2 = self._make_res_layer(block=block, planes=128, blocks=layers[1], spatial_stride=2, layer_name='layer2_') self.layer3 = self._make_res_layer(block=block, planes=256, blocks=layers[2], spatial_stride=2, layer_name='layer3_') self.layer4 = self._make_res_layer(block=block, planes=512, blocks=layers[3], spatial_stride=2, layer_name='layer4_') self.avgpool = nn.GlobalAvgPool2D() self.dropout = nn.Dropout(rate=self.dropout_ratio) self.fc = nn.Dense( in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))
def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1, norm_kwargs=None, partial_bn=False, pretrained_base=True, dropout_ratio=0.5, init_std=0.01, ctx=None, **kwargs): super(I3D_InceptionV1, self).__init__(**kwargs) self.num_segments = num_segments self.feat_dim = 1024 self.dropout_ratio = dropout_ratio self.init_std = init_std with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=64, channels=64, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=64, channels=192, kernel_size=3, padding=(1, 1, 1), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))) self.features.add(_make_Mixed_3a(192, 32, 'Mixed_3a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_3b(256, 64, 'Mixed_3b_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(2, 2, 2), padding=(1, 1, 1))) self.features.add(_make_Mixed_4a(480, 64, 'Mixed_4a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4b(512, 64, 'Mixed_4b_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4c(512, 64, 'Mixed_4c_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4d(512, 64, 'Mixed_4d_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_4e(528, 128, 'Mixed_4e_', norm_layer, norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=2, strides=(2, 2, 2))) self.features.add(_make_Mixed_5a(832, 128, 'Mixed_5a_', norm_layer, norm_kwargs)) self.features.add(_make_Mixed_5b(832, 128, 'Mixed_5b_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base: inceptionv1_2d = googlenet(pretrained=True) weights2d = inceptionv1_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
train_data = readingDataset('train') valid_data = readingDataset('test') for a,b in train_data: print(a.shape) print(b.shape) break net = nn.Sequential() with net.name_scope(): net.add( nn.Conv3D(channels=32, kernel_size=(5,5,1), activation='relu'), nn.MaxPool3D(pool_size=2, strides=(2,2,1)), nn.Conv3D(channels=64, kernel_size=(5,5,1), activation='relu'), nn.MaxPool3D(pool_size=2, strides=(2,2,1)), # nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(4) ) net.initialize(init=init.Xavier()) print(net) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) num_epoch = 10 for epoch in range(num_epoch):
def __init__(self, nclass=1000, pretrained=False, pretrained_base=True, num_segments=1, num_crop=1, feat_ext=False, dropout_ratio=0.5, init_std=0.01, partial_bn=False, ctx=None, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(I3D_InceptionV3, self).__init__(**kwargs) self.num_segments = num_segments self.num_crop = num_crop self.feat_dim = 2048 self.dropout_ratio = dropout_ratio self.init_std = init_std self.feat_ext = feat_ext with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(_make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add(_make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_basic_conv(in_channels=64, channels=80, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(_make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs)) self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs)) self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs)) self.features.add(_make_B('B_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs)) self.features.add(_make_D('D_', norm_layer, norm_kwargs)) self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs)) self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base and not pretrained: inceptionv3_2d = inception_v3(pretrained=True) weights2d = inceptionv3_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
def __init__(self, nclass, depth, num_stages=4, pretrained_base=True, num_segments=1, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), conv1_kernel_t=5, conv1_stride_t=2, pool1_kernel_t=1, pool1_stride_t=2, inflate_freq=(1, 1, 1, 1), inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 1, 1, 0), nonlocal_cfg=None, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(I3D_ResNetV1, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.nclass = nclass self.depth = depth self.num_stages = num_stages self.pretrained_base = pretrained_base self.num_segments = num_segments self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.frozen_stages = frozen_stages self.dropout_ratio = dropout_ratio self.init_std = init_std self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.first_stage = nn.HybridSequential(prefix='') self.first_stage.add( nn.Conv3D(in_channels=3, channels=64, kernel_size=(conv1_kernel_t, 7, 7), strides=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), use_bias=False)) self.first_stage.add( norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs))) self.first_stage.add(nn.Activation('relu')) self.first_stage.add( nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3), strides=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1))) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=(0, 0, 0)) self.res_layers = nn.HybridSequential(prefix='') for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i layer_name = 'layer{}_'.format(i + 1) res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name=layer_name) self.inplanes = planes * self.block.expansion self.res_layers.add(res_layer) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed. # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g. # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0) # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner. self.st_avg = nn.GlobalAvgPool3D() self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.fc) self.init_weights()
def __init__(self, classes=4, dropout_keep_prob=0.5, **kwargs): """400 classes in the Kinetics dataset.""" super(InceptionI3d, self).__init__(**kwargs) self._num_classes = classes self.dropout_keep_prob = dropout_keep_prob # this is the main classifier with self.name_scope(): self.features = nn.HybridSequential(prefix='') # the input shape is `batch_size` x `num_frames` x 224 x 224 x `num_channels` in tf code # but gluon is NCDHW # input shape is 1, 3, 79, 224, 224 self.features.add( _make_unit3d(channels=64, kernel_size=(7, 7, 7), strides=(2, 2, 2))) # shape is (1, 64, 37, 109, 109) self.features.add( nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 55, 55)) ) # here should be 'same' padding; hard code for now. # shape is (1, 64, 37, 109, 109) self.features.add(_make_unit3d(channels=64, kernel_size=(1, 1, 1))) # shape (1, 64, 37, 109, 109) self.features.add(_make_unit3d(channels=192, kernel_size=(3, 3, 3))) # shape (1, 192, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 54, 54))) # padding same # shape (1, 192, 35, 107, 107) self.features.add(_make_mixed_3b('mixed_3b')) self.features.add(_make_mixed_3c('mixed_3c')) #(1, 480, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding=(18, 54, 54))) # padding is same here self.features.add(_make_mixed_4b('mixed_4b')) # self.features.add(_make_mixed_4c('mixed_4c')) self.features.add(_make_mixed_4d('mixed_4d')) self.features.add(_make_mixed_4e('mixed_4e')) self.features.add(_make_mixed_4f('mixed_4f')) # (1, 384, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding=(18, 54, 54))) self.features.add(_make_mixed_5b('mixed_5b')) self.features.add(_make_mixed_5c('mixed_5c')) self.features.add(nn.AvgPool3D(pool_size=(2, 7, 7))) self.features.add(nn.Dropout(self.dropout_keep_prob)) self.features.add( _make_unit3d(channels=self._num_classes, kernel_size=(1, 1, 1))) # logits/main classifier outputs endpoint self.output = nn.HybridSequential(prefix='') self.output.add(nn.Flatten()) self.output.add(nn.Dense(self._num_classes))
def __init__(self, nclass, dropout_ratio=0.5, num_segments=1, num_crop=1, feat_ext=False, init_std=0.001, ctx=None, **kwargs): super(C3D, self).__init__() self.num_segments = num_segments self.num_crop = num_crop self.feat_ext = feat_ext self.feat_dim = 8192 with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=3, channels=64, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool1 = nn.MaxPool3D(pool_size=(1, 2, 2), strides=(1, 2, 2)) self.conv2 = nn.Conv3D(in_channels=64, channels=128, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool2 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2)) self.conv3a = nn.Conv3D(in_channels=128, channels=256, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv3b = nn.Conv3D(in_channels=256, channels=256, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool3 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2)) self.conv4a = nn.Conv3D(in_channels=256, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv4b = nn.Conv3D(in_channels=512, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool4 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2)) self.conv5a = nn.Conv3D(in_channels=512, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv5b = nn.Conv3D(in_channels=512, channels=512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool5 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding=(0, 1, 1)) self.fc6 = nn.Dense(in_units=8192, units=4096, weight_initializer=init.Normal(sigma=init_std)) self.fc7 = nn.Dense(in_units=4096, units=4096, weight_initializer=init.Normal(sigma=init_std)) self.fc8 = nn.Dense(in_units=4096, units=nclass, weight_initializer=init.Normal(sigma=init_std)) self.dropout = nn.Dropout(rate=dropout_ratio) self.relu = nn.Activation('relu')
def __init__(self, in_channels=1024, nonlocal_type="gaussian", dim=3, embed=True, embed_dim=None, sub_sample=True, use_bn=True, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(NonLocal, self).__init__() assert nonlocal_type in ['gaussian', 'dot', 'concat'] self.nonlocal_type = nonlocal_type self.embed = embed self.embed_dim = embed_dim if embed_dim is not None else in_channels // 2 self.sub_sample = sub_sample self.use_bn = use_bn with self.name_scope(): if self.embed: if dim == 2: self.theta = nn.Conv2D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) self.phi = nn.Conv2D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) self.g = nn.Conv2D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) elif dim == 3: self.theta = nn.Conv3D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) self.phi = nn.Conv3D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) self.g = nn.Conv3D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) if self.nonlocal_type == 'concat': if dim == 2: self.concat_proj = nn.HybridSequential() self.concat_proj.add( nn.Conv2D(in_channels=self.embed_dim * 2, channels=1, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu())) self.concat_proj.add(nn.Activation('relu')) elif dim == 3: self.concat_proj = nn.HybridSequential() self.concat_proj.add( nn.Conv3D(in_channels=self.embed_dim * 2, channels=1, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu())) self.concat_proj.add(nn.Activation('relu')) if sub_sample: if dim == 2: self.max_pool = nn.MaxPool2D(pool_size=(2, 2)) elif dim == 3: self.max_pool = nn.MaxPool3D(pool_size=(1, 2, 2)) self.sub_phi = nn.HybridSequential() self.sub_phi.add(self.phi) self.sub_phi.add(self.max_pool) self.sub_g = nn.HybridSequential() self.sub_g.add(self.g) self.sub_g.add(self.max_pool) if dim == 2: self.W = nn.Conv2D(in_channels=self.embed_dim, channels=in_channels, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) elif dim == 3: self.W = nn.Conv3D(in_channels=self.embed_dim, channels=in_channels, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) if use_bn: self.bn = norm_layer( in_channels=in_channels, gamma_initializer='zeros', **({} if norm_kwargs is None else norm_kwargs)) self.W_bn = nn.HybridSequential() self.W_bn.add(self.W) self.W_bn.add(self.bn)
net.add(nn.Flatten()) net.add(nn.Dense(4096, activation='relu')) net.add(nn.Dropout(.5)) # Stage 5 net.add(nn.Dense(4096, activation='relu')) net.add(nn.Dropout(.5)) # Stage 6 net.add(nn.Dense(10)) net_new = gluon.nn.Sequential() with net_new.name_scope(): # Stage 1 net.add(nn.Conv3D( channels=96, kernel_size=(1,11,11), strides=(1,4,4), activation='relu')) net.add(nn.MaxPool3D(pool_size=(1,3,3),strides=(1,2,2))) # Stage 2 net.add(nn.Conv3D( channels=32, kernel_size=(16,5,5), strides=(8,1,1),padding=(0,2,2), activation='relu')) net.add(nn.MaxPool3D(pool_size=(1,3,3),strides=(1,2,2))) # Stage 3 ned.add(nn.Conv3D( channels=)) def transform(data, label): # Resize from 28 x 28 to 224 x 224 data = image.imresize(data, 224, 224) return utils.transform_mnist(data, label) batch_size = 64 train_data, test_data = utils.load_data_fashion_mnist(batch_size, transform) ctx = utils.try_gpu()