Example #1
0
def get_R2plus1d(num_class=101,
                 no_bias=0,
                 model_depth=18,
                 final_spatial_kernel=7,
                 final_temporal_kernel=4):
    comp_count = 0
    net = nn.Sequential()
    net.add(
        nn.Conv3D(channels=45,
                  kernel_size=(1, 7, 7),
                  strides=(1, 2, 2),
                  padding=(0, 3, 3)), nn.BatchNorm(),
        nn.Activation(activation='relu'),
        nn.Conv3D(channels=64,
                  kernel_size=(3, 1, 1),
                  strides=(1, 1, 1),
                  padding=(1, 0, 0)), nn.BatchNorm(),
        nn.Activation(activation='relu'))

    (n1, n2, n3, n4) = BLOCK_CONFIG[model_depth]

    # conv_2x
    for _ in range(n1):
        net.add(R3DBlock(input_filter=64, num_filter=64,
                         comp_index=comp_count))
        comp_count += 1

    #conv_3x
    net.add(
        R3DBlock(input_filter=64,
                 num_filter=128,
                 comp_index=comp_count,
                 downsampling=True))
    comp_count += 1
    for _ in range(n2 - 1):
        net.add(
            R3DBlock(input_filter=128, num_filter=128, comp_index=comp_count))
        comp_count += 1
    #conv_4x
    net.add(R3DBlock(128, 256, comp_index=comp_count, downsampling=True))
    comp_count += 1

    for _ in range(n3 - 1):
        net.add(R3DBlock(256, 256))
        comp_count += 1
    #conv_5x
    net.add(R3DBlock(256, 512, comp_index=comp_count, downsampling=True))
    for _ in range(n4 - 1):
        net.add(R3DBlock(512, 512, comp_count))
        comp_count += 1
    # final layers
    net.add(
        nn.AvgPool3D(pool_size=(final_temporal_kernel, final_spatial_kernel,
                                final_spatial_kernel),
                     strides=(1, 1, 1),
                     padding=(0, 0, 0)))
    net.add(nn.Dense(units=num_class))  #,activation='sigmoid',use_bias=True))
    return net
Example #2
0
def test_pool():
    layers1d = [
        nn.MaxPool1D(),
        nn.MaxPool1D(3),
        nn.MaxPool1D(3, 2),
        nn.AvgPool1D(),
        nn.AvgPool1D(count_include_pad=False),
        nn.GlobalAvgPool1D(),
        ]
    for layer in layers1d:
        check_layer_forward(layer, (1, 2, 10))


    layers2d = [
        nn.MaxPool2D(),
        nn.MaxPool2D((3, 3)),
        nn.MaxPool2D(3, 2),
        nn.AvgPool2D(),
        nn.AvgPool2D(count_include_pad=False),
        nn.GlobalAvgPool2D(),
        ]
    for layer in layers2d:
        check_layer_forward(layer, (1, 2, 10, 10))

    layers3d = [
        nn.MaxPool3D(),
        nn.MaxPool3D((3, 3, 3)),
        nn.MaxPool3D(3, 2),
        nn.AvgPool3D(),
        nn.AvgPool3D(count_include_pad=False),
        nn.GlobalAvgPool3D(),
        ]
    for layer in layers3d:
        check_layer_forward(layer, (1, 2, 10, 10, 10))

    # test ceil_mode
    x = mx.nd.zeros((2, 2, 10, 10))

    layer = nn.MaxPool2D(3, ceil_mode=False)
    layer.collect_params().initialize()
    assert (layer(x).shape==(2, 2, 3, 3))

    layer = nn.MaxPool2D(3, ceil_mode=True)
    layer.collect_params().initialize()
    assert (layer(x).shape==(2, 2, 4, 4))
Example #3
0
    def __init__(self,
                 nclass,
                 input_channel=3,
                 batch_normal=True,
                 dropout_ratio=0.8,
                 init_std=0.001,
                 **kwargs):
        super(P3D, self).__init__()
        self.nclass = nclass
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.expansion = 1

        with self.name_scope():
            self.conv1 = nn.Conv3D(in_channels=input_channel,
                                   channels=64,
                                   kernel_size=(1, 7, 7),
                                   strides=(1, 2, 2),
                                   padding=(0, 3, 3),
                                   use_bias=False)
            self.bn1 = nn.BatchNorm(in_channels=64)
            self.relu = nn.Activation('relu')
            self.maxpool = nn.MaxPool3D(pool_size=(1, 3, 3),
                                        strides=(1, 2, 2),
                                        padding=(0, 1, 1))
            self.conv2 = nn.HybridSequential()
            self.conv2.add(
                P3D_block('A', 64, 64 * self.expansion, 2),
                P3D_block('B', 64 * self.expansion, 64 * self.expansion),
                P3D_block('C', 64 * self.expansion, 64 * self.expansion))
            self.conv3 = nn.HybridSequential()
            self.conv3.add(
                P3D_block('A', 64 * self.expansion, 128 * self.expansion, 2),
                P3D_block('B', 128 * self.expansion, 128 * self.expansion),
                P3D_block('C', 128 * self.expansion, 128 * self.expansion),
                P3D_block('A', 128 * self.expansion, 128 * self.expansion))
            self.conv4 = nn.HybridSequential()
            self.conv4.add(
                P3D_block('B', 128 * self.expansion, 256 * self.expansion, 2),
                P3D_block('C', 256 * self.expansion, 256 * self.expansion),
                P3D_block('A', 256 * self.expansion, 256 * self.expansion),
                P3D_block('B', 256 * self.expansion, 256 * self.expansion),
                P3D_block('C', 256 * self.expansion, 256 * self.expansion),
                P3D_block('A', 256 * self.expansion, 256 * self.expansion))
            self.conv5 = nn.HybridSequential()
            self.conv5.add(
                P3D_block('B', 256 * self.expansion, 512 * self.expansion, 2),
                P3D_block('C', 512 * self.expansion, 512 * self.expansion),
                P3D_block('A', 512 * self.expansion, 512))
            self.avg_pool = nn.AvgPool3D(pool_size=(1, 3, 3))
            self.output = nn.Dense(
                in_units=512,
                units=nclass,
                weight_initializer=init.Normal(sigma=init_std))
Example #4
0
def _make_branch(use_pool, norm_layer, norm_kwargs, *conv_settings):
    out = nn.HybridSequential(prefix='')
    if use_pool == 'avg':
        out.add(nn.AvgPool3D(pool_size=3, strides=1, padding=1))
    elif use_pool == 'max':
        out.add(nn.MaxPool3D(pool_size=3, strides=1, padding=1))
    setting_names = ['in_channels', 'channels', 'kernel_size', 'strides', 'padding']
    for setting in conv_settings:
        kwargs = {}
        for i, value in enumerate(setting):
            if value is not None:
                if setting_names[i] == 'in_channels':
                    in_channels = value
                elif setting_names[i] == 'channels':
                    channels = value
                else:
                    kwargs[setting_names[i]] = value
        out.add(_make_basic_conv(in_channels, channels, norm_layer, norm_kwargs, **kwargs))
    return out
Example #5
0
    def __init__(self, feat_size, input_channel=1, power=1, **kwargs):
        super(Conv3DNet, self).__init__(**kwargs)

        power = int(power)
        self.parameters = [(8, 5, 1, 2), (16, 3, 2, 1), (16, 3, 1, 1),
                           (32, 3, 2, 1), (32, 3, 1, 1), (64, 3, 2, 1),
                           (64, 3, 1, 1), (64, 3, 1, 1)]
        self.block = nn.Sequential()
        for params in self.parameters:
            self.block.add(
                nn.Conv3D(channels=params[0] * power,
                          kernel_size=params[1],
                          strides=params[2],
                          padding=params[3]), nn.BatchNorm(),
                nn.Activation('relu'))

        self.avgpool = nn.AvgPool3D(pool_size=(4, 4, 4))

        self.fc = nn.Dense(feat_size)
Example #6
0
 def __init__(self, layers, feature, arch = '', 
         auto = False, norm = False, device = None, last = True, flatten = False, reconstruct = False):
     super(D2, self).__init__()
     self.arch = arch
     self.reconstruct = reconstruct
     self.layers = layers
     self.norm = norm
     with self.name_scope():
         self.activation = nn.Activation('relu')
         self.tanh = nn.Activation('tanh')
         self.sigmoid = nn.Activation('sigmoid')
         self.relu = nn.Activation('relu')
         #self.norm = nn.BatchNorm(axis = 1)
         self.fc = nn.Dense(1, flatten = False)
         self.dropout = nn.Dropout(0.5)
         self.pool = nn.AvgPool3D([3, 1, 1], [2, 1, 1])
         self.encoder = []; self.decoder = []
         self.enorm = []; self.dnorm = []; self.rnorm = []
         self.add([8, 16, 32])
def _make_3D(prefix):
    # branch 2, 3D
    # 3D layer, first two layer are same as three convolution layer's first two layer
    branch_2 = MyHybridSequential(prefix=prefix)
    with branch_2.name_scope():
        branch_2.add(_make_branch(None, (64, 1, None, None), (96, 3, None, 1)))
        branch_2.add(MyReshape(shape=(-1, 16, 96, 28, 28)))
        branch_2.add(MyTranspose(axes=(0, 2, 1, 3, 4)))
        branch_2.add(
            nn.Conv3D(channels=128,
                      kernel_size=(3, 3, 3),
                      strides=(1, 1, 1),
                      padding=(1, 1, 1)))
        # Block1
        block_1 = BlockV1(128)
        branch_2.add(block_1)

        branch_2.add(nn.BatchNorm(epsilon=0.0001))
        branch_2.add(nn.Activation('relu'))
        # Block2
        block_2 = BlockV2(256)
        branch_2.add(block_2)

        # Block3
        block_3 = BlockV1(256)
        branch_2.add(block_3)

        branch_2.add(nn.BatchNorm(epsilon=0.0001))
        branch_2.add(nn.Activation('relu'))
        # Block4
        block_4 = BlockV2(512)
        branch_2.add(block_4)
        # Block5
        block_5 = BlockV1(512)
        branch_2.add(block_5)

        branch_2.add(nn.BatchNorm(epsilon=0.0001))
        branch_2.add(nn.Activation('relu'))
        branch_2.add(nn.AvgPool3D(pool_size=(4, 7, 7), strides=(1, 1, 1)))
        branch_2.add(MyReshape(shape=(-1, 512)))
        branch_2.add(nn.Dropout(0.5))

    return branch_2
Example #8
0
    def __init__(self,nclass,input_channel=3,batch_normal=True, dropout_ratio=0.8, init_std=0.001,**kwargs):
        super(Res21D_34, self).__init__()
        self.nclass = nclass
        self.new_length = 8
        self.dropout_ratio=dropout_ratio
        self.init_std=init_std
#        self.config_3d_layer = [2,2,2,2]
#        self.config_3d_temporal_stride = [1,2,2,2]
        with self.name_scope():
            self.conv1 = nn.Conv3D(in_channels=input_channel, channels=64, kernel_size=(3,7,7),strides=(1,2,2),padding=(1,3,3),weight_initializer=init.Xavier(),bias_initializer='zero')
            self.conv2 = nn.HybridSequential()
            self.conv2.add(
                    Res21D_Block(in_channel=64,out_channel=64,spatial_stride=2),
                    Res21D_Block(64,64),
                    Res21D_Block(64,64) )
            self.conv3 = nn.HybridSequential()
            self.conv3.add(
                    Res21D_Block(in_channel=64,out_channel=128,spatial_stride=2,temporal_stride=2),
                    Res21D_Block(128,128),
                    Res21D_Block(128,128),
                    Res21D_Block(128,128))
            self.conv4 = nn.HybridSequential()
            self.conv4.add(
                    Res21D_Block(in_channel=128,out_channel=256,spatial_stride=2,temporal_stride=2),
                    Res21D_Block(256,256),
                    Res21D_Block(256,256),
                    Res21D_Block(256,256),
                    Res21D_Block(256,256),
                    Res21D_Block(256,256))
            self.conv5 = nn.HybridSequential()
            self.conv5.add(
                    Res21D_Block(in_channel=256,out_channel=512,spatial_stride=2,temporal_stride=2),
                    Res21D_Block(512,512),
                    Res21D_Block(512,512))
            self.avg_pool = nn.AvgPool3D(pool_size=(1,4,4))
            self.output = nn.Dense(in_units=512,units=nclass,weight_initializer=init.Normal(sigma=init_std))
Example #9
0
    def __init__(self,
                 num_class,
                 model_depth,
                 final_spatial_kernel=7,
                 final_temporal_kernel=4,
                 with_bias=False):
        super(R2Plus2D, self).__init__()
        self.comp_count = 0
        self.base = nn.HybridSequential(prefix='base_')
        with self.base.name_scope():
            self.base.add(
                nn.Conv3D(channels=45,
                          kernel_size=(1, 7, 7),
                          strides=(1, 2, 2),
                          padding=(0, 3, 3),
                          use_bias=with_bias), nn.BatchNorm(),
                nn.Activation(activation='relu'),
                nn.Conv3D(channels=64,
                          kernel_size=(3, 1, 1),
                          strides=(1, 1, 1),
                          padding=(1, 0, 0),
                          use_bias=with_bias), nn.BatchNorm(),
                nn.Activation(activation='relu'))

        self.base_name = self.set_base_name()
        (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth]

        self.conv2_name = []
        self.conv2 = nn.HybridSequential(prefix='conv2_')
        with self.conv2.name_scope():
            for _ in range(n2):
                self.conv2_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              comp_index=self.comp_count,
                                              prefix=self.conv2.prefix))
                self.conv2.add(
                    R3DBlock(input_filter=64,
                             num_filter=64,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        #self.conv3
        self.conv3_name = []
        self.conv3 = nn.HybridSequential(prefix='conv3_')
        with self.conv3.name_scope():
            print("this in conv3 comp_count is ", self.comp_count)
            self.conv3_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv3.add(
                R3DBlock(input_filter=64,
                         num_filter=128,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))

            self.comp_count += 1
            for _ in range(n3 - 1):
                self.conv3_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              downsampling=False,
                                              comp_index=self.comp_count))
                self.conv3.add(
                    R3DBlock(input_filter=128,
                             num_filter=128,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        # self.conv4
        self.conv4_name = []
        self.conv4 = nn.HybridSequential(prefix='conv4_')
        with self.conv4.name_scope():
            self.conv4_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv4.add(
                R3DBlock(128,
                         256,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))
            self.comp_count += 1

            for _ in range(n4 - 1):
                self.conv4_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              downsampling=False,
                                              comp_index=self.comp_count))
                self.conv4.add(
                    R3DBlock(256,
                             256,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        #conv5
        self.conv5_name = []
        self.conv5 = nn.HybridSequential(prefix='conv5_')
        with self.conv5.name_scope():
            self.conv5_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv5.add(
                R3DBlock(256,
                         512,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))
            self.comp_count += 1
            for _ in range(n5 - 1):
                self.conv5_name.extend(
                    self.add_comp_count_index(comp_index=self.comp_count))
                self.conv5.add(
                    R3DBlock(512, 512, self.comp_count, use_bias=with_bias))
                self.comp_count += 1

        # final output of conv5 is [512,t/8,7,7]
        self.avg = nn.AvgPool3D(pool_size=(final_temporal_kernel,
                                           final_spatial_kernel,
                                           final_spatial_kernel),
                                strides=(1, 1, 1),
                                padding=(0, 0, 0))
        self.output = nn.Dense(units=num_class,
                               activation='sigmoid',
                               use_bias=True)
        self.dense0_name = ['final_fc_weight', 'final_fc_bias']
Example #10
0
    def __init__(self, classes=4, dropout_keep_prob=0.5, **kwargs):
        """400 classes in the Kinetics dataset."""
        super(InceptionI3d, self).__init__(**kwargs)
        self._num_classes = classes
        self.dropout_keep_prob = dropout_keep_prob

        # this is the main classifier
        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')

            # the input shape is `batch_size` x `num_frames` x 224 x 224 x `num_channels` in tf code
            # but gluon is NCDHW
            # input shape is 1, 3, 79, 224, 224

            self.features.add(
                _make_unit3d(channels=64,
                             kernel_size=(7, 7, 7),
                             strides=(2, 2, 2)))
            # shape is (1, 64, 37, 109, 109)

            self.features.add(
                nn.MaxPool3D(pool_size=(1, 3, 3),
                             strides=(1, 2, 2),
                             padding=(0, 55, 55))
            )  # here should be 'same' padding; hard code for now.
            # shape is (1, 64, 37, 109, 109)

            self.features.add(_make_unit3d(channels=64, kernel_size=(1, 1, 1)))
            # shape (1, 64, 37, 109, 109)

            self.features.add(_make_unit3d(channels=192,
                                           kernel_size=(3, 3, 3)))
            # shape (1, 192, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(1, 3, 3),
                             strides=(1, 2, 2),
                             padding=(0, 54, 54)))  # padding same
            # shape (1, 192, 35, 107, 107)

            self.features.add(_make_mixed_3b('mixed_3b'))

            self.features.add(_make_mixed_3c('mixed_3c'))
            #(1, 480, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(3, 3, 3),
                             strides=(2, 2, 2),
                             padding=(18, 54, 54)))  # padding is same here

            self.features.add(_make_mixed_4b('mixed_4b'))
            #
            self.features.add(_make_mixed_4c('mixed_4c'))

            self.features.add(_make_mixed_4d('mixed_4d'))

            self.features.add(_make_mixed_4e('mixed_4e'))

            self.features.add(_make_mixed_4f('mixed_4f'))
            # (1, 384, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(2, 2, 2),
                             strides=(2, 2, 2),
                             padding=(18, 54, 54)))

            self.features.add(_make_mixed_5b('mixed_5b'))

            self.features.add(_make_mixed_5c('mixed_5c'))

            self.features.add(nn.AvgPool3D(pool_size=(2, 7, 7)))

            self.features.add(nn.Dropout(self.dropout_keep_prob))

            self.features.add(
                _make_unit3d(channels=self._num_classes,
                             kernel_size=(1, 1, 1)))

            # logits/main classifier outputs endpoint
            self.output = nn.HybridSequential(prefix='')
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(self._num_classes))
Example #11
0
    def __init__(self,
                 nclass,
                 base_model='resnet18_v1b',
                 pretrained_base=True,
                 num_segments=8,
                 num_temporal=1,
                 ifTSN=True,
                 input_channel=3,
                 batch_normal=True,
                 dropout_ratio=0.8,
                 init_std=0.001,
                 **kwargs):
        super(ECO, self).__init__()
        self.nclass = nclass
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.num_segments = num_segments
        self.ifTSN = ifTSN
        self.input_shape = 224
        self.base_model = base_model  #['resnet18_v1b','resnet18_v2','resnet18_v1b_kinetics400','resnet18_v1b_k400_ucf101'][1]

        # resnet50 101 152 的 self.expansion == 4
        #self.expansion = 4 if ('resnet50_v1b' in self.base_model)or('resnet101_v1b' in self.base_model)or('resnet152_v1b' in self.base_model) else 1

        if 'resnet18_v1b' in self.base_model:
            self.expansion = 1
        elif 'resnet34_v1b' in self.base_model:
            self.expansion = 1
        elif 'resnet50_v1b' in self.base_model:
            self.expansion = 4
        elif 'resnet101_v1b' in self.base_model:
            self.expansion = 4
        elif 'resnet152_v1b' in self.base_model:
            self.expansion = 4
        else:
            self.expansion = 1

        #2d 卷积的出来的维度
        self.feat_dim_2d = 128 * self.expansion

        # num_temporal 默认为1 论文中 一开始不减少时间维
        self.num_temporal = num_temporal
        if self.num_segments == 4:
            self.num_temporal = 1
        elif self.num_segments == 8:
            self.num_temporal = num_temporal
        elif self.num_segments == 16:
            self.num_temporal = num_temporal
        elif self.num_segments == 32:
            self.num_temporal = num_temporal
        else:
            self.num_temporal = 1

        # 输入fc的维度
        if self.ifTSN == True:
            self.feat_dim_3d = 512
        else:  # Flatten
            tmppara = self.num_segments // 4
            tmppara = tmppara // (self.num_temporal if tmppara > 1 else 1)
            self.feat_dim_3d = 512 * tmppara

        pretrained_model = get_model(self.base_model,
                                     pretrained=pretrained_base)

        with self.name_scope():
            # x = nd.zeros(shape=(7x8,3,224,224))
            #2D feature
            if self.base_model == 'resnet18_v2':
                self.feature2d = pretrained_model.features
            else:  #'resnet18_v1b' in self.base_model:
                self.conv1 = pretrained_model.conv1
                self.bn1 = pretrained_model.bn1
                self.relu = pretrained_model.relu
                self.conv1 = pretrained_model.conv1
                self.maxpool = pretrained_model.maxpool
                self.layer1 = pretrained_model.layer1
                self.layer2 = pretrained_model.layer2

            #3D feature
            self.features_3d = nn.HybridSequential(prefix='')
            # conv3_x
            self.features_3d.add(
                BasicBlock(in_channel=self.feat_dim_2d,
                           out_channel=128,
                           spatial_stride=1,
                           temporal_stride=self.num_temporal))
            self.features_3d.add(
                BasicBlock(in_channel=128,
                           out_channel=128,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv4_x
            self.features_3d.add(
                BasicBlock(in_channel=128,
                           out_channel=256,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features_3d.add(
                BasicBlock(in_channel=256,
                           out_channel=256,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv5_x
            self.features_3d.add(
                BasicBlock(in_channel=256,
                           out_channel=512,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features_3d.add(
                BasicBlock(in_channel=512,
                           out_channel=512,
                           spatial_stride=1,
                           temporal_stride=1))
            self.features_3d.add(nn.AvgPool3D(pool_size=(1, 7, 7)))
            self.dropout = nn.Dropout(rate=self.dropout_ratio)
            self.output = nn.HybridSequential(prefix='')
            if self.ifTSN == True:
                self.output.add(
                    nn.Dense(
                        units=self.nclass,
                        in_units=512,
                        weight_initializer=init.Normal(sigma=self.init_std)))
            else:
                self.output.add(
                    nn.Dense(
                        units=512,
                        in_units=self.feat_dim_3d,
                        weight_initializer=init.Normal(sigma=self.init_std)),
                    nn.Dense(
                        units=self.nclass,
                        in_units=512,
                        weight_initializer=init.Normal(sigma=self.init_std)))
            # init
            if pretrained_base:
                self.features_3d.initialize(init.MSRAPrelu())
                self.output.initialize(init.MSRAPrelu())
Example #12
0
    def __init__(self,
                 nclass,
                 input_channel=3,
                 dropout_ratio=0.5,
                 init_std=0.001,
                 **kwargs):
        super(Res3D, self).__init__()
        self.nclass = nclass
        self.num_segments = 8
        #self.feat_dim = 4096
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.config_3d_layer = [2, 2, 2, 2]
        self.config_3d_temporal_stride = [1, 2, 2, 2]

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            # conv1
            self.features.add(
                nn.Conv3D(in_channels=input_channel,
                          channels=64,
                          kernel_size=(3, 7, 7),
                          strides=(1, 2, 2),
                          padding=(1, 3, 3),
                          weight_initializer=init.Xavier(rnd_type='gaussian',
                                                         factor_type='out',
                                                         magnitude=2),
                          bias_initializer='zero'))
            # conv2_x
            self.features.add(
                BasicBlock(in_channel=64,
                           out_channel=64,
                           spatial_stride=1,
                           temporal_stride=1))  # input size = 112*112
            self.features.add(
                BasicBlock(in_channel=64,
                           out_channel=64,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv3_x
            self.features.add(
                BasicBlock(in_channel=64,
                           out_channel=128,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features.add(
                BasicBlock(in_channel=128,
                           out_channel=128,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv4_x
            self.features.add(
                BasicBlock(in_channel=128,
                           out_channel=256,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features.add(
                BasicBlock(in_channel=256,
                           out_channel=256,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv5_x
            self.features.add(
                BasicBlock(in_channel=256,
                           out_channel=512,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features.add(
                BasicBlock(in_channel=512,
                           out_channel=512,
                           spatial_stride=1,
                           temporal_stride=1))
            # avg pool
            self.features.add(nn.AvgPool3D(pool_size=(1, 7, 7)))

        self.output = nn.Dense(
            units=self.nclass,
            in_units=512,
            weight_initializer=init.Normal(sigma=self.init_std))
Example #13
0
    def __init__(self,
                 kernel,
                 n_layers,
                 feature_size,
                 device=None,
                 last=True,
                 connection='dense'):

        # kernel = 2D Kernel

        super(D2, self).__init__()

        self.n_layers = n_layers

        self.connection = connection

        self.d3 = []

        self.c = feature_size

        self.k = 1

        c = self.c

        k = self.k

        self.kd2 = kernel

        with self.name_scope():

            self.activation = nn.Activation('relu')

            self.tanh = nn.Activation('tanh')

            self.sigmoid = nn.Activation('sigmoid')

            self.relu = nn.Activation('relu')

            #self.norm = nn.BatchNorm(axis = 1)

            self.fc = nn.Dense(1, flatten=False)

            self.dropout = nn.Dropout(0.5)

            self.pool = nn.AvgPool3D([3, 1, 1], [2, 1, 1])

            if kernel != 'x':

                for n in range(self.n_layers):

                    if kernel == 1:

                        tk = 1 + n * 2  # time kernel

                        tkp = n

                        self.d3.append(
                            nn.Conv3D(c, [3, k, k], [1, 1, 1], [1, 0, 0],
                                      dilation=[1, 1, 1]))

                    elif kernel == 32:

                        self.d3.append(
                            nn.Conv3D(c, [3, 3, 3], [1, 1, 1], [1, 1, 1],
                                      dilation=[1, 1, 1]))

                    self.register_child(self.d3[-1])

            elif kernel == 'x':

                self.encoder = []

                self.decoder = []

                self.enorm = []

                self.dnorm = []

                for l in range(n_layers):

                    c2 = int(c / 2)

                    c4 = int(c / 2)

                    stride = 1

                    stride_de = 1

                    dilation = 2  #** (l)

                    dilation_de = 2  #** (l + n_layers)

                    g = 1

                    ks = 3

                    self.encoder.append(
                        nn.Conv2D(c2,
                                  kernel_size=[1, ks],
                                  strides=[1, stride],
                                  padding=[0, dilation],
                                  dilation=[1, dilation]))

                    channel = c4 if l == n_layers - 1 else c2

                    self.decoder.append(
                        nn.Conv2D(channel,
                                  kernel_size=[1, ks],
                                  strides=[1, stride_de],
                                  padding=[0, dilation_de],
                                  dilation=[1, dilation_de]))

                    self.register_child(self.encoder[-1])

                    self.register_child(self.decoder[-1])
Example #14
0
    def __init__(
        self,
        num_scenes,
        num_actions,
        model_depth,
        final_spatial_kernel=7,
        final_temporal_kernel=2,
        with_bias=False,
    ):
        super(R2Plus2D_MT, self).__init__()
        self.comp_count = 0
        self.base = nn.Sequential(prefix='base_')
        with self.base.name_scope():
            self.base.add(
                nn.Conv3D(channels=45,
                          kernel_size=(1, 7, 7),
                          strides=(1, 2, 2),
                          padding=(0, 3, 3),
                          use_bias=with_bias), nn.BatchNorm(),
                nn.Activation(activation='relu'),
                nn.Conv3D(channels=64,
                          kernel_size=(3, 1, 1),
                          strides=(1, 1, 1),
                          padding=(1, 0, 0),
                          use_bias=with_bias), nn.BatchNorm(),
                nn.Activation(activation='relu'))

        self.base_name = self.set_base_name()
        (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth]

        self.conv2_name = []
        self.conv2 = nn.Sequential(prefix='conv2_')
        with self.conv2.name_scope():
            for _ in range(n2):
                self.conv2_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              comp_index=self.comp_count,
                                              prefix=self.conv2.prefix))
                self.conv2.add(
                    R3DBlock(input_filter=64,
                             num_filter=64,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        #self.conv3
        self.conv3_name = []
        self.conv3 = nn.Sequential(prefix='conv3_')
        with self.conv3.name_scope():
            print("this in conv3 comp_count is ", self.comp_count)
            self.conv3_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv3.add(
                R3DBlock(input_filter=64,
                         num_filter=128,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))

            self.comp_count += 1
            for _ in range(n3 - 1):
                self.conv3_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              downsampling=False,
                                              comp_index=self.comp_count))
                self.conv3.add(
                    R3DBlock(input_filter=128,
                             num_filter=128,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        # self.conv4
        self.conv4_name = []
        self.conv4 = nn.Sequential(prefix='conv4_')
        with self.conv4.name_scope():
            self.conv4_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv4.add(
                R3DBlock(128,
                         256,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))
            self.comp_count += 1

            for _ in range(n4 - 1):
                self.conv4_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              downsampling=False,
                                              comp_index=self.comp_count))
                self.conv4.add(
                    R3DBlock(256,
                             256,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        #conv5
        self.conv5_name = []
        self.conv5 = nn.Sequential(prefix='conv5_')
        with self.conv5.name_scope():
            self.conv5_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv5.add(
                R3DBlock(256,
                         512,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))
            self.comp_count += 1
            for _ in range(n5 - 1):
                self.conv5_name.extend(
                    self.add_comp_count_index(comp_index=self.comp_count))
                self.conv5.add(
                    R3DBlock(512, 512, self.comp_count, use_bias=with_bias))
                self.comp_count += 1

        # final output of conv5 is [512,t/8,7,7] #512x1x7x7
        # for static scene tagging
        self.scene_conv = nn.Sequential()
        self.scene_conv.add(
            nn.Conv3D(256, kernel_size=(1, 3, 3), strides=(1, 2, 2)),
            nn.BatchNorm(),
            nn.Activation('relu'))  # shape 256*1*2*2 # reshape(1024)
        self.scene_drop = nn.Dropout(rate=0.3)
        self.scene_output = nn.Dense(num_scenes)

        # for action classification
        self.action_conv = nn.Sequential()
        self.action_conv.add(
            nn.Conv3D(512,
                      kernel_size=(1, 3, 3),
                      strides=(1, 1, 1),
                      padding=(0, 1, 1)), nn.BatchNorm(),
            nn.Activation('relu'))
        self.action_avg = nn.AvgPool3D(pool_size=(final_temporal_kernel,
                                                  final_spatial_kernel,
                                                  final_spatial_kernel),
                                       strides=(1, 1, 1),
                                       padding=(0, 0, 0))

        self.action_output = nn.Dense(units=num_actions)
        self.dense0_name = ['final_fc_weight', 'final_fc_bias']