def get_R2plus1d(num_class=101, no_bias=0, model_depth=18, final_spatial_kernel=7, final_temporal_kernel=4): comp_count = 0 net = nn.Sequential() net.add( nn.Conv3D(channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3)), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=64, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0)), nn.BatchNorm(), nn.Activation(activation='relu')) (n1, n2, n3, n4) = BLOCK_CONFIG[model_depth] # conv_2x for _ in range(n1): net.add(R3DBlock(input_filter=64, num_filter=64, comp_index=comp_count)) comp_count += 1 #conv_3x net.add( R3DBlock(input_filter=64, num_filter=128, comp_index=comp_count, downsampling=True)) comp_count += 1 for _ in range(n2 - 1): net.add( R3DBlock(input_filter=128, num_filter=128, comp_index=comp_count)) comp_count += 1 #conv_4x net.add(R3DBlock(128, 256, comp_index=comp_count, downsampling=True)) comp_count += 1 for _ in range(n3 - 1): net.add(R3DBlock(256, 256)) comp_count += 1 #conv_5x net.add(R3DBlock(256, 512, comp_index=comp_count, downsampling=True)) for _ in range(n4 - 1): net.add(R3DBlock(512, 512, comp_count)) comp_count += 1 # final layers net.add( nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel, final_spatial_kernel), strides=(1, 1, 1), padding=(0, 0, 0))) net.add(nn.Dense(units=num_class)) #,activation='sigmoid',use_bias=True)) return net
def test_pool(): layers1d = [ nn.MaxPool1D(), nn.MaxPool1D(3), nn.MaxPool1D(3, 2), nn.AvgPool1D(), nn.AvgPool1D(count_include_pad=False), nn.GlobalAvgPool1D(), ] for layer in layers1d: check_layer_forward(layer, (1, 2, 10)) layers2d = [ nn.MaxPool2D(), nn.MaxPool2D((3, 3)), nn.MaxPool2D(3, 2), nn.AvgPool2D(), nn.AvgPool2D(count_include_pad=False), nn.GlobalAvgPool2D(), ] for layer in layers2d: check_layer_forward(layer, (1, 2, 10, 10)) layers3d = [ nn.MaxPool3D(), nn.MaxPool3D((3, 3, 3)), nn.MaxPool3D(3, 2), nn.AvgPool3D(), nn.AvgPool3D(count_include_pad=False), nn.GlobalAvgPool3D(), ] for layer in layers3d: check_layer_forward(layer, (1, 2, 10, 10, 10)) # test ceil_mode x = mx.nd.zeros((2, 2, 10, 10)) layer = nn.MaxPool2D(3, ceil_mode=False) layer.collect_params().initialize() assert (layer(x).shape==(2, 2, 3, 3)) layer = nn.MaxPool2D(3, ceil_mode=True) layer.collect_params().initialize() assert (layer(x).shape==(2, 2, 4, 4))
def __init__(self, nclass, input_channel=3, batch_normal=True, dropout_ratio=0.8, init_std=0.001, **kwargs): super(P3D, self).__init__() self.nclass = nclass self.dropout_ratio = dropout_ratio self.init_std = init_std self.expansion = 1 with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=input_channel, channels=64, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.bn1 = nn.BatchNorm(in_channels=64) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.conv2 = nn.HybridSequential() self.conv2.add( P3D_block('A', 64, 64 * self.expansion, 2), P3D_block('B', 64 * self.expansion, 64 * self.expansion), P3D_block('C', 64 * self.expansion, 64 * self.expansion)) self.conv3 = nn.HybridSequential() self.conv3.add( P3D_block('A', 64 * self.expansion, 128 * self.expansion, 2), P3D_block('B', 128 * self.expansion, 128 * self.expansion), P3D_block('C', 128 * self.expansion, 128 * self.expansion), P3D_block('A', 128 * self.expansion, 128 * self.expansion)) self.conv4 = nn.HybridSequential() self.conv4.add( P3D_block('B', 128 * self.expansion, 256 * self.expansion, 2), P3D_block('C', 256 * self.expansion, 256 * self.expansion), P3D_block('A', 256 * self.expansion, 256 * self.expansion), P3D_block('B', 256 * self.expansion, 256 * self.expansion), P3D_block('C', 256 * self.expansion, 256 * self.expansion), P3D_block('A', 256 * self.expansion, 256 * self.expansion)) self.conv5 = nn.HybridSequential() self.conv5.add( P3D_block('B', 256 * self.expansion, 512 * self.expansion, 2), P3D_block('C', 512 * self.expansion, 512 * self.expansion), P3D_block('A', 512 * self.expansion, 512)) self.avg_pool = nn.AvgPool3D(pool_size=(1, 3, 3)) self.output = nn.Dense( in_units=512, units=nclass, weight_initializer=init.Normal(sigma=init_std))
def _make_branch(use_pool, norm_layer, norm_kwargs, *conv_settings): out = nn.HybridSequential(prefix='') if use_pool == 'avg': out.add(nn.AvgPool3D(pool_size=3, strides=1, padding=1)) elif use_pool == 'max': out.add(nn.MaxPool3D(pool_size=3, strides=1, padding=1)) setting_names = ['in_channels', 'channels', 'kernel_size', 'strides', 'padding'] for setting in conv_settings: kwargs = {} for i, value in enumerate(setting): if value is not None: if setting_names[i] == 'in_channels': in_channels = value elif setting_names[i] == 'channels': channels = value else: kwargs[setting_names[i]] = value out.add(_make_basic_conv(in_channels, channels, norm_layer, norm_kwargs, **kwargs)) return out
def __init__(self, feat_size, input_channel=1, power=1, **kwargs): super(Conv3DNet, self).__init__(**kwargs) power = int(power) self.parameters = [(8, 5, 1, 2), (16, 3, 2, 1), (16, 3, 1, 1), (32, 3, 2, 1), (32, 3, 1, 1), (64, 3, 2, 1), (64, 3, 1, 1), (64, 3, 1, 1)] self.block = nn.Sequential() for params in self.parameters: self.block.add( nn.Conv3D(channels=params[0] * power, kernel_size=params[1], strides=params[2], padding=params[3]), nn.BatchNorm(), nn.Activation('relu')) self.avgpool = nn.AvgPool3D(pool_size=(4, 4, 4)) self.fc = nn.Dense(feat_size)
def __init__(self, layers, feature, arch = '', auto = False, norm = False, device = None, last = True, flatten = False, reconstruct = False): super(D2, self).__init__() self.arch = arch self.reconstruct = reconstruct self.layers = layers self.norm = norm with self.name_scope(): self.activation = nn.Activation('relu') self.tanh = nn.Activation('tanh') self.sigmoid = nn.Activation('sigmoid') self.relu = nn.Activation('relu') #self.norm = nn.BatchNorm(axis = 1) self.fc = nn.Dense(1, flatten = False) self.dropout = nn.Dropout(0.5) self.pool = nn.AvgPool3D([3, 1, 1], [2, 1, 1]) self.encoder = []; self.decoder = [] self.enorm = []; self.dnorm = []; self.rnorm = [] self.add([8, 16, 32])
def _make_3D(prefix): # branch 2, 3D # 3D layer, first two layer are same as three convolution layer's first two layer branch_2 = MyHybridSequential(prefix=prefix) with branch_2.name_scope(): branch_2.add(_make_branch(None, (64, 1, None, None), (96, 3, None, 1))) branch_2.add(MyReshape(shape=(-1, 16, 96, 28, 28))) branch_2.add(MyTranspose(axes=(0, 2, 1, 3, 4))) branch_2.add( nn.Conv3D(channels=128, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 1, 1))) # Block1 block_1 = BlockV1(128) branch_2.add(block_1) branch_2.add(nn.BatchNorm(epsilon=0.0001)) branch_2.add(nn.Activation('relu')) # Block2 block_2 = BlockV2(256) branch_2.add(block_2) # Block3 block_3 = BlockV1(256) branch_2.add(block_3) branch_2.add(nn.BatchNorm(epsilon=0.0001)) branch_2.add(nn.Activation('relu')) # Block4 block_4 = BlockV2(512) branch_2.add(block_4) # Block5 block_5 = BlockV1(512) branch_2.add(block_5) branch_2.add(nn.BatchNorm(epsilon=0.0001)) branch_2.add(nn.Activation('relu')) branch_2.add(nn.AvgPool3D(pool_size=(4, 7, 7), strides=(1, 1, 1))) branch_2.add(MyReshape(shape=(-1, 512))) branch_2.add(nn.Dropout(0.5)) return branch_2
def __init__(self,nclass,input_channel=3,batch_normal=True, dropout_ratio=0.8, init_std=0.001,**kwargs): super(Res21D_34, self).__init__() self.nclass = nclass self.new_length = 8 self.dropout_ratio=dropout_ratio self.init_std=init_std # self.config_3d_layer = [2,2,2,2] # self.config_3d_temporal_stride = [1,2,2,2] with self.name_scope(): self.conv1 = nn.Conv3D(in_channels=input_channel, channels=64, kernel_size=(3,7,7),strides=(1,2,2),padding=(1,3,3),weight_initializer=init.Xavier(),bias_initializer='zero') self.conv2 = nn.HybridSequential() self.conv2.add( Res21D_Block(in_channel=64,out_channel=64,spatial_stride=2), Res21D_Block(64,64), Res21D_Block(64,64) ) self.conv3 = nn.HybridSequential() self.conv3.add( Res21D_Block(in_channel=64,out_channel=128,spatial_stride=2,temporal_stride=2), Res21D_Block(128,128), Res21D_Block(128,128), Res21D_Block(128,128)) self.conv4 = nn.HybridSequential() self.conv4.add( Res21D_Block(in_channel=128,out_channel=256,spatial_stride=2,temporal_stride=2), Res21D_Block(256,256), Res21D_Block(256,256), Res21D_Block(256,256), Res21D_Block(256,256), Res21D_Block(256,256)) self.conv5 = nn.HybridSequential() self.conv5.add( Res21D_Block(in_channel=256,out_channel=512,spatial_stride=2,temporal_stride=2), Res21D_Block(512,512), Res21D_Block(512,512)) self.avg_pool = nn.AvgPool3D(pool_size=(1,4,4)) self.output = nn.Dense(in_units=512,units=nclass,weight_initializer=init.Normal(sigma=init_std))
def __init__(self, num_class, model_depth, final_spatial_kernel=7, final_temporal_kernel=4, with_bias=False): super(R2Plus2D, self).__init__() self.comp_count = 0 self.base = nn.HybridSequential(prefix='base_') with self.base.name_scope(): self.base.add( nn.Conv3D(channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=64, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu')) self.base_name = self.set_base_name() (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth] self.conv2_name = [] self.conv2 = nn.HybridSequential(prefix='conv2_') with self.conv2.name_scope(): for _ in range(n2): self.conv2_name.extend( self.add_comp_count_index(change_channels=False, comp_index=self.comp_count, prefix=self.conv2.prefix)) self.conv2.add( R3DBlock(input_filter=64, num_filter=64, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #self.conv3 self.conv3_name = [] self.conv3 = nn.HybridSequential(prefix='conv3_') with self.conv3.name_scope(): print("this in conv3 comp_count is ", self.comp_count) self.conv3_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=64, num_filter=128, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n3 - 1): self.conv3_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=128, num_filter=128, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 # self.conv4 self.conv4_name = [] self.conv4 = nn.HybridSequential(prefix='conv4_') with self.conv4.name_scope(): self.conv4_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv4.add( R3DBlock(128, 256, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n4 - 1): self.conv4_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv4.add( R3DBlock(256, 256, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #conv5 self.conv5_name = [] self.conv5 = nn.HybridSequential(prefix='conv5_') with self.conv5.name_scope(): self.conv5_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv5.add( R3DBlock(256, 512, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n5 - 1): self.conv5_name.extend( self.add_comp_count_index(comp_index=self.comp_count)) self.conv5.add( R3DBlock(512, 512, self.comp_count, use_bias=with_bias)) self.comp_count += 1 # final output of conv5 is [512,t/8,7,7] self.avg = nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel, final_spatial_kernel), strides=(1, 1, 1), padding=(0, 0, 0)) self.output = nn.Dense(units=num_class, activation='sigmoid', use_bias=True) self.dense0_name = ['final_fc_weight', 'final_fc_bias']
def __init__(self, classes=4, dropout_keep_prob=0.5, **kwargs): """400 classes in the Kinetics dataset.""" super(InceptionI3d, self).__init__(**kwargs) self._num_classes = classes self.dropout_keep_prob = dropout_keep_prob # this is the main classifier with self.name_scope(): self.features = nn.HybridSequential(prefix='') # the input shape is `batch_size` x `num_frames` x 224 x 224 x `num_channels` in tf code # but gluon is NCDHW # input shape is 1, 3, 79, 224, 224 self.features.add( _make_unit3d(channels=64, kernel_size=(7, 7, 7), strides=(2, 2, 2))) # shape is (1, 64, 37, 109, 109) self.features.add( nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 55, 55)) ) # here should be 'same' padding; hard code for now. # shape is (1, 64, 37, 109, 109) self.features.add(_make_unit3d(channels=64, kernel_size=(1, 1, 1))) # shape (1, 64, 37, 109, 109) self.features.add(_make_unit3d(channels=192, kernel_size=(3, 3, 3))) # shape (1, 192, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 54, 54))) # padding same # shape (1, 192, 35, 107, 107) self.features.add(_make_mixed_3b('mixed_3b')) self.features.add(_make_mixed_3c('mixed_3c')) #(1, 480, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding=(18, 54, 54))) # padding is same here self.features.add(_make_mixed_4b('mixed_4b')) # self.features.add(_make_mixed_4c('mixed_4c')) self.features.add(_make_mixed_4d('mixed_4d')) self.features.add(_make_mixed_4e('mixed_4e')) self.features.add(_make_mixed_4f('mixed_4f')) # (1, 384, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding=(18, 54, 54))) self.features.add(_make_mixed_5b('mixed_5b')) self.features.add(_make_mixed_5c('mixed_5c')) self.features.add(nn.AvgPool3D(pool_size=(2, 7, 7))) self.features.add(nn.Dropout(self.dropout_keep_prob)) self.features.add( _make_unit3d(channels=self._num_classes, kernel_size=(1, 1, 1))) # logits/main classifier outputs endpoint self.output = nn.HybridSequential(prefix='') self.output.add(nn.Flatten()) self.output.add(nn.Dense(self._num_classes))
def __init__(self, nclass, base_model='resnet18_v1b', pretrained_base=True, num_segments=8, num_temporal=1, ifTSN=True, input_channel=3, batch_normal=True, dropout_ratio=0.8, init_std=0.001, **kwargs): super(ECO, self).__init__() self.nclass = nclass self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.ifTSN = ifTSN self.input_shape = 224 self.base_model = base_model #['resnet18_v1b','resnet18_v2','resnet18_v1b_kinetics400','resnet18_v1b_k400_ucf101'][1] # resnet50 101 152 的 self.expansion == 4 #self.expansion = 4 if ('resnet50_v1b' in self.base_model)or('resnet101_v1b' in self.base_model)or('resnet152_v1b' in self.base_model) else 1 if 'resnet18_v1b' in self.base_model: self.expansion = 1 elif 'resnet34_v1b' in self.base_model: self.expansion = 1 elif 'resnet50_v1b' in self.base_model: self.expansion = 4 elif 'resnet101_v1b' in self.base_model: self.expansion = 4 elif 'resnet152_v1b' in self.base_model: self.expansion = 4 else: self.expansion = 1 #2d 卷积的出来的维度 self.feat_dim_2d = 128 * self.expansion # num_temporal 默认为1 论文中 一开始不减少时间维 self.num_temporal = num_temporal if self.num_segments == 4: self.num_temporal = 1 elif self.num_segments == 8: self.num_temporal = num_temporal elif self.num_segments == 16: self.num_temporal = num_temporal elif self.num_segments == 32: self.num_temporal = num_temporal else: self.num_temporal = 1 # 输入fc的维度 if self.ifTSN == True: self.feat_dim_3d = 512 else: # Flatten tmppara = self.num_segments // 4 tmppara = tmppara // (self.num_temporal if tmppara > 1 else 1) self.feat_dim_3d = 512 * tmppara pretrained_model = get_model(self.base_model, pretrained=pretrained_base) with self.name_scope(): # x = nd.zeros(shape=(7x8,3,224,224)) #2D feature if self.base_model == 'resnet18_v2': self.feature2d = pretrained_model.features else: #'resnet18_v1b' in self.base_model: self.conv1 = pretrained_model.conv1 self.bn1 = pretrained_model.bn1 self.relu = pretrained_model.relu self.conv1 = pretrained_model.conv1 self.maxpool = pretrained_model.maxpool self.layer1 = pretrained_model.layer1 self.layer2 = pretrained_model.layer2 #3D feature self.features_3d = nn.HybridSequential(prefix='') # conv3_x self.features_3d.add( BasicBlock(in_channel=self.feat_dim_2d, out_channel=128, spatial_stride=1, temporal_stride=self.num_temporal)) self.features_3d.add( BasicBlock(in_channel=128, out_channel=128, spatial_stride=1, temporal_stride=1)) # conv4_x self.features_3d.add( BasicBlock(in_channel=128, out_channel=256, spatial_stride=2, temporal_stride=2)) self.features_3d.add( BasicBlock(in_channel=256, out_channel=256, spatial_stride=1, temporal_stride=1)) # conv5_x self.features_3d.add( BasicBlock(in_channel=256, out_channel=512, spatial_stride=2, temporal_stride=2)) self.features_3d.add( BasicBlock(in_channel=512, out_channel=512, spatial_stride=1, temporal_stride=1)) self.features_3d.add(nn.AvgPool3D(pool_size=(1, 7, 7))) self.dropout = nn.Dropout(rate=self.dropout_ratio) self.output = nn.HybridSequential(prefix='') if self.ifTSN == True: self.output.add( nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))) else: self.output.add( nn.Dense( units=512, in_units=self.feat_dim_3d, weight_initializer=init.Normal(sigma=self.init_std)), nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))) # init if pretrained_base: self.features_3d.initialize(init.MSRAPrelu()) self.output.initialize(init.MSRAPrelu())
def __init__(self, nclass, input_channel=3, dropout_ratio=0.5, init_std=0.001, **kwargs): super(Res3D, self).__init__() self.nclass = nclass self.num_segments = 8 #self.feat_dim = 4096 self.dropout_ratio = dropout_ratio self.init_std = init_std self.config_3d_layer = [2, 2, 2, 2] self.config_3d_temporal_stride = [1, 2, 2, 2] with self.name_scope(): self.features = nn.HybridSequential(prefix='') # conv1 self.features.add( nn.Conv3D(in_channels=input_channel, channels=64, kernel_size=(3, 7, 7), strides=(1, 2, 2), padding=(1, 3, 3), weight_initializer=init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zero')) # conv2_x self.features.add( BasicBlock(in_channel=64, out_channel=64, spatial_stride=1, temporal_stride=1)) # input size = 112*112 self.features.add( BasicBlock(in_channel=64, out_channel=64, spatial_stride=1, temporal_stride=1)) # conv3_x self.features.add( BasicBlock(in_channel=64, out_channel=128, spatial_stride=2, temporal_stride=2)) self.features.add( BasicBlock(in_channel=128, out_channel=128, spatial_stride=1, temporal_stride=1)) # conv4_x self.features.add( BasicBlock(in_channel=128, out_channel=256, spatial_stride=2, temporal_stride=2)) self.features.add( BasicBlock(in_channel=256, out_channel=256, spatial_stride=1, temporal_stride=1)) # conv5_x self.features.add( BasicBlock(in_channel=256, out_channel=512, spatial_stride=2, temporal_stride=2)) self.features.add( BasicBlock(in_channel=512, out_channel=512, spatial_stride=1, temporal_stride=1)) # avg pool self.features.add(nn.AvgPool3D(pool_size=(1, 7, 7))) self.output = nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))
def __init__(self, kernel, n_layers, feature_size, device=None, last=True, connection='dense'): # kernel = 2D Kernel super(D2, self).__init__() self.n_layers = n_layers self.connection = connection self.d3 = [] self.c = feature_size self.k = 1 c = self.c k = self.k self.kd2 = kernel with self.name_scope(): self.activation = nn.Activation('relu') self.tanh = nn.Activation('tanh') self.sigmoid = nn.Activation('sigmoid') self.relu = nn.Activation('relu') #self.norm = nn.BatchNorm(axis = 1) self.fc = nn.Dense(1, flatten=False) self.dropout = nn.Dropout(0.5) self.pool = nn.AvgPool3D([3, 1, 1], [2, 1, 1]) if kernel != 'x': for n in range(self.n_layers): if kernel == 1: tk = 1 + n * 2 # time kernel tkp = n self.d3.append( nn.Conv3D(c, [3, k, k], [1, 1, 1], [1, 0, 0], dilation=[1, 1, 1])) elif kernel == 32: self.d3.append( nn.Conv3D(c, [3, 3, 3], [1, 1, 1], [1, 1, 1], dilation=[1, 1, 1])) self.register_child(self.d3[-1]) elif kernel == 'x': self.encoder = [] self.decoder = [] self.enorm = [] self.dnorm = [] for l in range(n_layers): c2 = int(c / 2) c4 = int(c / 2) stride = 1 stride_de = 1 dilation = 2 #** (l) dilation_de = 2 #** (l + n_layers) g = 1 ks = 3 self.encoder.append( nn.Conv2D(c2, kernel_size=[1, ks], strides=[1, stride], padding=[0, dilation], dilation=[1, dilation])) channel = c4 if l == n_layers - 1 else c2 self.decoder.append( nn.Conv2D(channel, kernel_size=[1, ks], strides=[1, stride_de], padding=[0, dilation_de], dilation=[1, dilation_de])) self.register_child(self.encoder[-1]) self.register_child(self.decoder[-1])
def __init__( self, num_scenes, num_actions, model_depth, final_spatial_kernel=7, final_temporal_kernel=2, with_bias=False, ): super(R2Plus2D_MT, self).__init__() self.comp_count = 0 self.base = nn.Sequential(prefix='base_') with self.base.name_scope(): self.base.add( nn.Conv3D(channels=45, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu'), nn.Conv3D(channels=64, kernel_size=(3, 1, 1), strides=(1, 1, 1), padding=(1, 0, 0), use_bias=with_bias), nn.BatchNorm(), nn.Activation(activation='relu')) self.base_name = self.set_base_name() (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth] self.conv2_name = [] self.conv2 = nn.Sequential(prefix='conv2_') with self.conv2.name_scope(): for _ in range(n2): self.conv2_name.extend( self.add_comp_count_index(change_channels=False, comp_index=self.comp_count, prefix=self.conv2.prefix)) self.conv2.add( R3DBlock(input_filter=64, num_filter=64, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #self.conv3 self.conv3_name = [] self.conv3 = nn.Sequential(prefix='conv3_') with self.conv3.name_scope(): print("this in conv3 comp_count is ", self.comp_count) self.conv3_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=64, num_filter=128, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n3 - 1): self.conv3_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv3.add( R3DBlock(input_filter=128, num_filter=128, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 # self.conv4 self.conv4_name = [] self.conv4 = nn.Sequential(prefix='conv4_') with self.conv4.name_scope(): self.conv4_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv4.add( R3DBlock(128, 256, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n4 - 1): self.conv4_name.extend( self.add_comp_count_index(change_channels=False, downsampling=False, comp_index=self.comp_count)) self.conv4.add( R3DBlock(256, 256, comp_index=self.comp_count, use_bias=with_bias)) self.comp_count += 1 #conv5 self.conv5_name = [] self.conv5 = nn.Sequential(prefix='conv5_') with self.conv5.name_scope(): self.conv5_name.extend( self.add_comp_count_index(change_channels=True, downsampling=True, comp_index=self.comp_count)) self.conv5.add( R3DBlock(256, 512, comp_index=self.comp_count, downsampling=True, use_bias=with_bias)) self.comp_count += 1 for _ in range(n5 - 1): self.conv5_name.extend( self.add_comp_count_index(comp_index=self.comp_count)) self.conv5.add( R3DBlock(512, 512, self.comp_count, use_bias=with_bias)) self.comp_count += 1 # final output of conv5 is [512,t/8,7,7] #512x1x7x7 # for static scene tagging self.scene_conv = nn.Sequential() self.scene_conv.add( nn.Conv3D(256, kernel_size=(1, 3, 3), strides=(1, 2, 2)), nn.BatchNorm(), nn.Activation('relu')) # shape 256*1*2*2 # reshape(1024) self.scene_drop = nn.Dropout(rate=0.3) self.scene_output = nn.Dense(num_scenes) # for action classification self.action_conv = nn.Sequential() self.action_conv.add( nn.Conv3D(512, kernel_size=(1, 3, 3), strides=(1, 1, 1), padding=(0, 1, 1)), nn.BatchNorm(), nn.Activation('relu')) self.action_avg = nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel, final_spatial_kernel), strides=(1, 1, 1), padding=(0, 0, 0)) self.action_output = nn.Dense(units=num_actions) self.dense0_name = ['final_fc_weight', 'final_fc_bias']